Skip to content

Commit 49ddd96

Browse files
committed
Added documentation and test coverage.
compare_perf_test.py is now covered with unit tests and public methods are documented in the implementation. Minor refactoring to better conform to Python conventions: * classes declared in new style * proper private method prefix of single underscore * replacing map with list comprehension where it was clearer Unit test are executed as part of validation-test. .gitignore was modified to ignore .coverage and htmlcov artifacts generated by the coverage.py package
1 parent d402593 commit 49ddd96

File tree

4 files changed

+336
-47
lines changed

4 files changed

+336
-47
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,5 @@ compile_commands.json
5151
8
5252
4
5353
SortedCFDatabase.def
54+
htmlcov
55+
.coverage

benchmark/scripts/compare_perf_tests.py

+103-47
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,22 @@
2121
from math import sqrt
2222

2323

24-
class PerformanceTestResult:
24+
class PerformanceTestResult(object):
25+
"""PerformanceTestResult holds results from executing individual benchmark
26+
from Swift Benchmark Suite as reported by test driver (Benchmark_O,
27+
Benchmark_Onone, Benchmark_Ounchecked or Benchmark_Driver).
28+
29+
It depends on the log format emmited by the test driver in the form:
30+
#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B)
31+
32+
The last column, MAX_RSS, is emmited only for runs instrumented by the
33+
Benchmark_Driver to measure rough memory use during the execution of the
34+
benchmark.
35+
"""
2536
def __init__(self, csv_row):
37+
"""PerformanceTestResult instance is created from an iterable with
38+
length of 8 or 9. (Like a row provided by the CSV parser.)
39+
"""
2640
# csv_row[0] is just an ordinal number of the test - skip that
2741
self.name = csv_row[1] # Name of the performance test
2842
self.samples = int(csv_row[2]) # Number of measurement samples taken
@@ -36,25 +50,41 @@ def __init__(self, csv_row):
3650
self.median = int(csv_row[7]) # Median runtime (ms)
3751
self.max_rss = ( # Maximum Resident Set Size (B)
3852
int(csv_row[8]) if len(csv_row) > 8 else None)
39-
# TODO if we really want to compute mean MAX_RSS: self.S_memory
53+
54+
def __repr__(self):
55+
return (
56+
'<PerformanceTestResult name:{0.name!r} '
57+
'samples:{0.samples!r} min:{0.min!r} max:{0.max!r} '
58+
'mean:{0.mean!r} sd:{0.sd!r} median:{0.median!r}>'.format(self))
4059

4160
@property
42-
def sd(self): # Standard Deviation (ms)
61+
def sd(self):
62+
"""Standard Deviation (ms)"""
4363
return (0 if self.samples < 2 else
4464
sqrt(self.S_runtime / (self.samples - 1)))
4565

46-
# Compute running variance, B. P. Welford's method
47-
# See Knuth TAOCP vol 2, 3rd edition, page 232, or
48-
# https://www.johndcook.com/blog/standard_deviation/
49-
# M is mean, Standard Deviation is defined as sqrt(S/k-1)
5066
@staticmethod
5167
def running_mean_variance((k, M_, S_), x):
68+
"""
69+
Compute running variance, B. P. Welford's method
70+
See Knuth TAOCP vol 2, 3rd edition, page 232, or
71+
https://www.johndcook.com/blog/standard_deviation/
72+
M is mean, Standard Deviation is defined as sqrt(S/k-1)
73+
"""
5274
k = float(k + 1)
5375
M = M_ + (x - M_) / k
5476
S = S_ + (x - M_) * (x - M)
5577
return (k, M, S)
5678

5779
def merge(self, r):
80+
"""Merging test results recomputes min and max.
81+
It attempts to recompute mean and standard deviation when all_samples
82+
are available. There is no correct way to compute these values from
83+
test results that are summaries from more than 3 samples.
84+
85+
The use case here is comparing tests results parsed from concatenated
86+
log files from multiple runs of benchmark driver.
87+
"""
5888
self.min = min(self.min, r.min)
5989
self.max = max(self.max, r.max)
6090
# self.median = None # unclear what to do here
@@ -65,23 +95,31 @@ def push(x):
6595
(self.samples, self.mean, self.S_runtime) = state
6696

6797
# Merging test results with up to 3 samples is exact
68-
# TODO investigate how to best handle merge of higher sample counts
69-
values = [r.min, r.max, r.median, r.mean][:min(r.samples, 4)]
98+
values = [r.min, r.max, r.median][:min(r.samples, 3)]
7099
map(push, values)
71100

101+
# Column labels for header row in results table
72102
header = ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS')
73103

74-
# Tuple of values formatted for display in results table:
75-
# (name, min value, max value, mean value, max_rss)
76104
def values(self):
77-
return (self.name, str(self.min), str(self.max), str(int(self.mean)),
78-
str(self.max_rss) if self.max_rss else '-')
79-
80-
81-
class ResultComparison:
105+
"""Values property for display in results table comparisons
106+
in format: ('TEST', 'MIN', 'MAX', 'MEAN', 'MAX_RSS').
107+
"""
108+
return (
109+
self.name,
110+
str(self.min), str(self.max), str(int(self.mean)),
111+
str(self.max_rss) if self.max_rss else '—'
112+
)
113+
114+
115+
class ResultComparison(object):
116+
"""ResultComparison compares MINs from new and old PerformanceTestResult.
117+
It computes speedup ratio and improvement delta (%).
118+
"""
82119
def __init__(self, old, new):
83120
self.old = old
84121
self.new = new
122+
assert(old.name == new.name)
85123
self.name = old.name # Test name, convenience accessor
86124

87125
# Speedup ratio
@@ -91,27 +129,43 @@ def __init__(self, old, new):
91129
ratio = (new.min + 0.001) / (old.min + 0.001)
92130
self.delta = ((ratio - 1) * 100)
93131

94-
self.is_dubious = ( # FIXME this is legacy
132+
# Add ' (?)' to the speedup column as indication of dubious changes:
133+
# result's MIN falls inside the (MIN, MAX) interval of result they are
134+
# being compared with.
135+
self.is_dubious = (
95136
' (?)' if ((old.min < new.min and new.min < old.max) or
96137
(new.min < old.min and old.min < new.max))
97138
else '')
98139

140+
# Column labels for header row in results table
99141
header = ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP')
100142

101-
# Tuple of values formatted for display in results table:
102-
# (name, old value, new value, delta [%], speedup ratio)
103143
def values(self):
104-
return (self.name, str(self.old.min), str(self.new.min),
144+
"""Values property for display in results table comparisons
145+
in format: ('TEST', 'OLD', 'NEW', 'DELTA', 'SPEEDUP').
146+
"""
147+
return (self.name,
148+
str(self.old.min), str(self.new.min),
105149
'{0:+.1f}%'.format(self.delta),
106150
'{0:.2f}x{1}'.format(self.ratio, self.is_dubious))
107151

108152

109-
class TestComparator:
110-
def __init__(self, old_file, new_file, delta_threshold, changes_only):
153+
class TestComparator(object):
154+
"""TestComparator parses `PerformanceTestResult`s from CSV log files.
155+
Then it determines which tests were `added`, `removed` and which can be
156+
compared. It then splits the `ResultComparison`s into 3 groups according to
157+
the `delta_threshold` by the change in performance: `increased`,
158+
`descreased` and `unchanged`.
159+
160+
The lists of `added`, `removed` and `unchanged` tests are sorted
161+
alphabetically. The `increased` and `decreased` lists are sorted in
162+
descending order by the amount of change.
163+
"""
164+
def __init__(self, old_file, new_file, delta_threshold):
111165

112166
def load_from_CSV(filename): # handles output from Benchmark_O and
113167
def skip_totals(row): # Benchmark_Driver (added MAX_RSS column)
114-
return len(row) > 7 and row[0].isdigit()
168+
return len(row) > 7 and row[0].isdigit()
115169
tests = map(PerformanceTestResult,
116170
filter(skip_totals, csv.reader(open(filename))))
117171

@@ -131,9 +185,9 @@ def add_or_merge(names, r):
131185
added_tests = new_tests.difference(old_tests)
132186
removed_tests = old_tests.difference(new_tests)
133187

134-
self.added = sorted(map(lambda t: new_results[t], added_tests),
188+
self.added = sorted([new_results[t] for t in added_tests],
135189
key=lambda r: r.name)
136-
self.removed = sorted(map(lambda t: old_results[t], removed_tests),
190+
self.removed = sorted([old_results[t] for t in removed_tests],
137191
key=lambda r: r.name)
138192

139193
def compare(name):
@@ -144,24 +198,28 @@ def compare(name):
144198
def partition(l, p):
145199
return reduce(lambda x, y: x[not p(y)].append(y) or x, l, ([], []))
146200

147-
# TODO take standard deviation (SD) into account
148201
decreased, not_decreased = partition(
149202
comparisons, lambda c: c.ratio < (1 - delta_threshold))
150203
increased, unchanged = partition(
151204
not_decreased, lambda c: c.ratio > (1 + delta_threshold))
152205

153206
# sorted partitions
154-
names = map(lambda c: c.name, comparisons)
207+
names = [c.name for c in comparisons]
155208
comparisons = dict(zip(names, comparisons))
156-
self.decreased = map(lambda c: comparisons[c.name],
157-
sorted(decreased, key=lambda c: -c.delta))
158-
self.increased = map(lambda c: comparisons[c.name],
159-
sorted(increased, key=lambda c: c.delta))
160-
self.unchanged = map(lambda c: comparisons[c.name],
161-
sorted(unchanged, key=lambda c: c.name))
162-
163-
164-
class ReportFormatter:
209+
self.decreased = [comparisons[c.name]
210+
for c in sorted(decreased, key=lambda c: -c.delta)]
211+
self.increased = [comparisons[c.name]
212+
for c in sorted(increased, key=lambda c: c.delta)]
213+
self.unchanged = [comparisons[c.name]
214+
for c in sorted(unchanged, key=lambda c: c.name)]
215+
216+
217+
class ReportFormatter(object):
218+
"""ReportFormatter formats the `PerformanceTestResult`s and
219+
`ResultComparison`s provided by `TestComparator` using their `header` and
220+
`values()` into report table. Supported formats are: `markdown` (used for
221+
displaying benchmark results on GitHub), `git` (default) and `html`.
222+
"""
165223
def __init__(self, comparator, old_branch, new_branch, changes_only):
166224
self.comparator = comparator
167225
self.old_branch = old_branch
@@ -178,35 +236,34 @@ def __init__(self, comparator, old_branch, new_branch, changes_only):
178236
{0} ({1}): {2}"""
179237

180238
def markdown(self):
181-
return self.__formatted_text(
239+
return self._formatted_text(
182240
ROW='{0} | {1} | {2} | {3} | {4} \n',
183241
HEADER_SEPARATOR='---',
184242
DETAIL=self.MARKDOWN_DETAIL)
185243

186244
def git(self):
187-
return self.__formatted_text(
245+
return self._formatted_text(
188246
ROW='{0} {1} {2} {3} {4} \n',
189247
HEADER_SEPARATOR=' ',
190248
DETAIL=self.GIT_DETAIL)
191249

192-
def __column_widths(self):
250+
def _column_widths(self):
193251
changed = self.comparator.decreased + self.comparator.increased
194252
comparisons = (changed if self.changes_only else
195253
changed + self.comparator.unchanged)
196254
comparisons += self.comparator.added + self.comparator.removed
197255

198-
values = map(lambda c: c.values(), comparisons)
199256
widths = map(lambda columns: map(len, columns),
200257
[PerformanceTestResult.header, ResultComparison.header] +
201-
values)
258+
[c.values() for c in comparisons])
202259

203260
def max_widths(maximum, widths):
204261
return tuple(map(max, zip(maximum, widths)))
205262

206263
return reduce(max_widths, widths, tuple([0] * 5))
207264

208-
def __formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL):
209-
widths = self.__column_widths()
265+
def _formatted_text(self, ROW, HEADER_SEPARATOR, DETAIL):
266+
widths = self._column_widths()
210267

211268
def justify_columns(contents):
212269
return tuple(map(lambda (w, c): c.ljust(w), zip(widths, contents)))
@@ -343,7 +400,7 @@ def main():
343400

344401
args = parser.parse_args()
345402
comparator = TestComparator(args.old_file, args.new_file,
346-
float(args.delta_threshold), args.changes_only)
403+
float(args.delta_threshold))
347404
formatter = ReportFormatter(comparator, args.old_branch, args.new_branch,
348405
args.changes_only)
349406

@@ -372,9 +429,8 @@ def write_to_file(file_name, data):
372429
"""
373430
Write data to given file
374431
"""
375-
file = open(file_name, 'w')
376-
file.write(data)
377-
file.close
432+
with open(file_name, 'w') as f:
433+
f.write(data)
378434

379435

380436
if __name__ == '__main__':

0 commit comments

Comments
 (0)