21
21
from math import sqrt
22
22
23
23
24
- class PerformanceTestResult :
24
+ class PerformanceTestResult (object ):
25
+ """PerformanceTestResult holds results from executing individual benchmark
26
+ from Swift Benchmark Suite as reported by test driver (Benchmark_O,
27
+ Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).
28
+
29
+ It depends on the log format emmited by the test driver in the form:
30
+ #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
31
+
32
+ The last column, MAX_RSS, is emmited only for runs instrumented by the
33
+ Benchmark_Driver to measure rough memory use during the execution of the
34
+ benchmark.
35
+ """
25
36
def __init__ (self , csv_row ):
37
+ """PerformanceTestResult instance is created from an iterable with
38
+ length of 8 or 9. (Like a row provided by the CSV parser.)
39
+ """
26
40
# csv_row[0] is just an ordinal number of the test - skip that
27
41
self .name = csv_row [1 ] # Name of the performance test
28
42
self .samples = int (csv_row [2 ]) # Number of measurement samples taken
@@ -36,25 +50,41 @@ def __init__(self, csv_row):
36
50
self .median = int (csv_row [7 ]) # Median runtime (ms)
37
51
self .max_rss = ( # Maximum Resident Set Size (B)
38
52
int (csv_row [8 ]) if len (csv_row ) > 8 else None )
39
- # TODO if we really want to compute mean MAX_RSS: self.S_memory
53
+
54
+ def __repr__ (self ):
55
+ return (
56
+ '<PerformanceTestResult name:{0.name!r} '
57
+ 'samples:{0.samples!r} min:{0.min!r} max:{0.max!r} '
58
+ 'mean:{0.mean!r} sd:{0.sd!r} median:{0.median!r}>' .format (self ))
40
59
41
60
@property
42
- def sd (self ): # Standard Deviation (ms)
61
+ def sd (self ):
62
+ """Standard Deviation (ms)"""
43
63
return (0 if self .samples < 2 else
44
64
sqrt (self .S_runtime / (self .samples - 1 )))
45
65
46
- # Compute running variance, B. P. Welford's method
47
- # See Knuth TAOCP vol 2, 3rd edition, page 232, or
48
- # https://www.johndcook.com/blog/standard_deviation/
49
- # M is mean, Standard Deviation is defined as sqrt(S/k-1)
50
66
@staticmethod
51
67
def running_mean_variance ((k , M_ , S_ ), x ):
68
+ """
69
+ Compute running variance, B. P. Welford's method
70
+ See Knuth TAOCP vol 2, 3rd edition, page 232, or
71
+ https://www.johndcook.com/blog/standard_deviation/
72
+ M is mean, Standard Deviation is defined as sqrt(S/k-1)
73
+ """
52
74
k = float (k + 1 )
53
75
M = M_ + (x - M_ ) / k
54
76
S = S_ + (x - M_ ) * (x - M )
55
77
return (k , M , S )
56
78
57
79
def merge (self , r ):
80
+ """Merging test results recomputes min and max.
81
+ It attempts to recompute mean and standard deviation when all_samples
82
+ are available. There is no correct way to compute these values from
83
+ test results that are summaries from more than 3 samples.
84
+
85
+ The use case here is comparing tests results parsed from concatenated
86
+ log files from multiple runs of benchmark driver.
87
+ """
58
88
self .min = min (self .min , r .min )
59
89
self .max = max (self .max , r .max )
60
90
# self.median = None # unclear what to do here
@@ -65,23 +95,31 @@ def push(x):
65
95
(self .samples , self .mean , self .S_runtime ) = state
66
96
67
97
# Merging test results with up to 3 samples is exact
68
- # TODO investigate how to best handle merge of higher sample counts
69
- values = [r .min , r .max , r .median , r .mean ][:min (r .samples , 4 )]
98
+ values = [r .min , r .max , r .median ][:min (r .samples , 3 )]
70
99
map (push , values )
71
100
101
+ # Column labels for header row in results table
72
102
header = ('TEST' , 'MIN' , 'MAX' , 'MEAN' , 'MAX_RSS' )
73
103
74
- # Tuple of values formatted for display in results table:
75
- # (name, min value, max value, mean value, max_rss)
76
104
def values (self ):
77
- return (self .name , str (self .min ), str (self .max ), str (int (self .mean )),
78
- str (self .max_rss ) if self .max_rss else '-' )
79
-
80
-
81
- class ResultComparison :
105
+ """Values property for display in results table comparisons
106
+ in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
107
+ """
108
+ return (
109
+ self .name ,
110
+ str (self .min ), str (self .max ), str (int (self .mean )),
111
+ str (self .max_rss ) if self .max_rss else '—'
112
+ )
113
+
114
+
115
+ class ResultComparison (object ):
116
+ """ResultComparison compares MINs from new and old PerformanceTestResult.
117
+ It computes speedup ratio and improvement delta (%).
118
+ """
82
119
def __init__ (self , old , new ):
83
120
self .old = old
84
121
self .new = new
122
+ assert (old .name == new .name )
85
123
self .name = old .name # Test name, convenience accessor
86
124
87
125
# Speedup ratio
@@ -91,27 +129,43 @@ def __init__(self, old, new):
91
129
ratio = (new .min + 0.001 ) / (old .min + 0.001 )
92
130
self .delta = ((ratio - 1 ) * 100 )
93
131
94
- self .is_dubious = ( # FIXME this is legacy
132
+ # Add ' (?)' to the speedup column as indication of dubious changes:
133
+ # result's MIN falls inside the (MIN, MAX) interval of result they are
134
+ # being compared with.
135
+ self .is_dubious = (
95
136
' (?)' if ((old .min < new .min and new .min < old .max ) or
96
137
(new .min < old .min and old .min < new .max ))
97
138
else '' )
98
139
140
+ # Column labels for header row in results table
99
141
header = ('TEST' , 'OLD' , 'NEW' , 'DELTA' , 'SPEEDUP' )
100
142
101
- # Tuple of values formatted for display in results table:
102
- # (name, old value, new value, delta [%], speedup ratio)
103
143
def values (self ):
104
- return (self .name , str (self .old .min ), str (self .new .min ),
144
+ """Values property for display in results table comparisons
145
+ in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
146
+ """
147
+ return (self .name ,
148
+ str (self .old .min ), str (self .new .min ),
105
149
'{0:+.1f}%' .format (self .delta ),
106
150
'{0:.2f}x{1}' .format (self .ratio , self .is_dubious ))
107
151
108
152
109
- class TestComparator :
110
- def __init__ (self , old_file , new_file , delta_threshold , changes_only ):
153
+ class TestComparator (object ):
154
+ """TestComparator parses `PerformanceTestResult`s from CSV log files.
155
+ Then it determines which tests were `added`, `removed` and which can be
156
+ compared. It then splits the `ResultComparison`s into 3 groups according to
157
+ the `delta_threshold` by the change in performance: `increased`,
158
+ `descreased` and `unchanged`.
159
+
160
+ The lists of `added`, `removed` and `unchanged` tests are sorted
161
+ alphabetically. The `increased` and `decreased` lists are sorted in
162
+ descending order by the amount of change.
163
+ """
164
+ def __init__ (self , old_file , new_file , delta_threshold ):
111
165
112
166
def load_from_CSV (filename ): # handles output from Benchmark_O and
113
167
def skip_totals (row ): # Benchmark_Driver (added MAX_RSS column)
114
- return len (row ) > 7 and row [0 ].isdigit ()
168
+ return len (row ) > 7 and row [0 ].isdigit ()
115
169
tests = map (PerformanceTestResult ,
116
170
filter (skip_totals , csv .reader (open (filename ))))
117
171
@@ -131,9 +185,9 @@ def add_or_merge(names, r):
131
185
added_tests = new_tests .difference (old_tests )
132
186
removed_tests = old_tests .difference (new_tests )
133
187
134
- self .added = sorted (map ( lambda t : new_results [t ], added_tests ) ,
188
+ self .added = sorted ([ new_results [t ] for t in added_tests ] ,
135
189
key = lambda r : r .name )
136
- self .removed = sorted (map ( lambda t : old_results [t ], removed_tests ) ,
190
+ self .removed = sorted ([ old_results [t ] for t in removed_tests ] ,
137
191
key = lambda r : r .name )
138
192
139
193
def compare (name ):
@@ -144,24 +198,28 @@ def compare(name):
144
198
def partition (l , p ):
145
199
return reduce (lambda x , y : x [not p (y )].append (y ) or x , l , ([], []))
146
200
147
- # TODO take standard deviation (SD) into account
148
201
decreased , not_decreased = partition (
149
202
comparisons , lambda c : c .ratio < (1 - delta_threshold ))
150
203
increased , unchanged = partition (
151
204
not_decreased , lambda c : c .ratio > (1 + delta_threshold ))
152
205
153
206
# sorted partitions
154
- names = map ( lambda c : c .name , comparisons )
207
+ names = [ c .name for c in comparisons ]
155
208
comparisons = dict (zip (names , comparisons ))
156
- self .decreased = map (lambda c : comparisons [c .name ],
157
- sorted (decreased , key = lambda c : - c .delta ))
158
- self .increased = map (lambda c : comparisons [c .name ],
159
- sorted (increased , key = lambda c : c .delta ))
160
- self .unchanged = map (lambda c : comparisons [c .name ],
161
- sorted (unchanged , key = lambda c : c .name ))
162
-
163
-
164
- class ReportFormatter :
209
+ self .decreased = [comparisons [c .name ]
210
+ for c in sorted (decreased , key = lambda c : - c .delta )]
211
+ self .increased = [comparisons [c .name ]
212
+ for c in sorted (increased , key = lambda c : c .delta )]
213
+ self .unchanged = [comparisons [c .name ]
214
+ for c in sorted (unchanged , key = lambda c : c .name )]
215
+
216
+
217
+ class ReportFormatter (object ):
218
+ """ReportFormatter formats the `PerformanceTestResult`s and
219
+ `ResultComparison`s provided by `TestComparator` using their `header` and
220
+ `values()` into report table. Supported formats are: `markdown` (used for
221
+ displaying benchmark results on GitHub), `git` (default) and `html`.
222
+ """
165
223
def __init__ (self , comparator , old_branch , new_branch , changes_only ):
166
224
self .comparator = comparator
167
225
self .old_branch = old_branch
@@ -178,35 +236,34 @@ def __init__(self, comparator, old_branch, new_branch, changes_only):
178
236
{0} ({1}): {2}"""
179
237
180
238
def markdown (self ):
181
- return self .__formatted_text (
239
+ return self ._formatted_text (
182
240
ROW = '{0} | {1} | {2} | {3} | {4} \n ' ,
183
241
HEADER_SEPARATOR = '---' ,
184
242
DETAIL = self .MARKDOWN_DETAIL )
185
243
186
244
def git (self ):
187
- return self .__formatted_text (
245
+ return self ._formatted_text (
188
246
ROW = '{0} {1} {2} {3} {4} \n ' ,
189
247
HEADER_SEPARATOR = ' ' ,
190
248
DETAIL = self .GIT_DETAIL )
191
249
192
- def __column_widths (self ):
250
+ def _column_widths (self ):
193
251
changed = self .comparator .decreased + self .comparator .increased
194
252
comparisons = (changed if self .changes_only else
195
253
changed + self .comparator .unchanged )
196
254
comparisons += self .comparator .added + self .comparator .removed
197
255
198
- values = map (lambda c : c .values (), comparisons )
199
256
widths = map (lambda columns : map (len , columns ),
200
257
[PerformanceTestResult .header , ResultComparison .header ] +
201
- values )
258
+ [ c . values () for c in comparisons ] )
202
259
203
260
def max_widths (maximum , widths ):
204
261
return tuple (map (max , zip (maximum , widths )))
205
262
206
263
return reduce (max_widths , widths , tuple ([0 ] * 5 ))
207
264
208
- def __formatted_text (self , ROW , HEADER_SEPARATOR , DETAIL ):
209
- widths = self .__column_widths ()
265
+ def _formatted_text (self , ROW , HEADER_SEPARATOR , DETAIL ):
266
+ widths = self ._column_widths ()
210
267
211
268
def justify_columns (contents ):
212
269
return tuple (map (lambda (w , c ): c .ljust (w ), zip (widths , contents )))
@@ -343,7 +400,7 @@ def main():
343
400
344
401
args = parser .parse_args ()
345
402
comparator = TestComparator (args .old_file , args .new_file ,
346
- float (args .delta_threshold ), args . changes_only )
403
+ float (args .delta_threshold ))
347
404
formatter = ReportFormatter (comparator , args .old_branch , args .new_branch ,
348
405
args .changes_only )
349
406
@@ -372,9 +429,8 @@ def write_to_file(file_name, data):
372
429
"""
373
430
Write data to given file
374
431
"""
375
- file = open (file_name , 'w' )
376
- file .write (data )
377
- file .close
432
+ with open (file_name , 'w' ) as f :
433
+ f .write (data )
378
434
379
435
380
436
if __name__ == '__main__' :
0 commit comments