Skip to content

Commit 855009a

Browse files
committed
benchmark: use t-test for comparing node versions
The data sampling is done in node and the data processing is done in R. Only plyr was added as an R dependency and it is fairly standard. PR-URL: #7094 Reviewed-By: Trevor Norris <[email protected]> Reviewed-By: Jeremiah Senkpiel <[email protected]> Reviewed-By: Brian White <[email protected]> Reviewed-By: Anna Henningsen <[email protected]>
1 parent 8bb59fd commit 855009a

File tree

3 files changed

+161
-162
lines changed

3 files changed

+161
-162
lines changed

benchmark/_cli.R

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
2+
args = commandArgs(TRUE);
3+
4+
args.options = list();
5+
6+
temp.option.key = NULL;
7+
8+
for (arg in args) {
9+
# Optional arguments declaration
10+
if (substring(arg, 1, 1) == '-') {
11+
temp.option.key = substring(arg, 2);
12+
if (substring(arg, 2, 2) == '-') {
13+
temp.option.key = substring(arg, 3);
14+
}
15+
16+
args.options[[temp.option.key]] = TRUE;
17+
}
18+
# Optional arguments value
19+
else if (!is.null(temp.option.key)) {
20+
args.options[[temp.option.key]] = arg;
21+
22+
temp.option.key = NULL;
23+
}
24+
}

benchmark/compare.R

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env Rscript
2+
library(ggplot2);
3+
library(plyr);
4+
5+
# get __dirname and load ./_cli.R
6+
args = commandArgs(trailingOnly = F);
7+
dirname = dirname(sub("--file=", "", args[grep("--file", args)]));
8+
source(paste0(dirname, '/_cli.R'), chdir=T);
9+
10+
if (!is.null(args.options$help) ||
11+
(!is.null(args.options$plot) && args.options$plot == TRUE)) {
12+
stop("usage: cat file.csv | Rscript compare.R
13+
--help show this message
14+
--plot filename save plot to filename");
15+
}
16+
17+
plot.filename = args.options$plot;
18+
19+
dat = read.csv(file('stdin'));
20+
dat = data.frame(dat);
21+
dat$nameTwoLines = paste0(dat$filename, '\n', dat$configuration);
22+
dat$name = paste0(dat$filename, dat$configuration);
23+
24+
# Create a box plot
25+
if (!is.null(plot.filename)) {
26+
p = ggplot(data=dat);
27+
p = p + geom_boxplot(aes(x=nameTwoLines, y=rate, fill=binary));
28+
p = p + ylab("rate of operations (higher is better)");
29+
p = p + xlab("benchmark");
30+
p = p + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5));
31+
ggsave(plot.filename, p);
32+
}
33+
34+
# Print a table with results
35+
statistics = ddply(dat, "name", function(subdat) {
36+
# Perform a statistics test to see of there actually is a difference in
37+
# performace.
38+
w = t.test(rate ~ binary, data=subdat);
39+
40+
# Calculate improvement for the "new" binary compared with the "old" binary
41+
new_mu = mean(subset(subdat, binary == "new")$rate);
42+
old_mu = mean(subset(subdat, binary == "old")$rate);
43+
improvement = sprintf("%.2f %%", ((new_mu - old_mu) / old_mu * 100));
44+
45+
# Add user friendly stars to the table. There should be at least one star
46+
# before you can say that there is an improvement.
47+
significant = '';
48+
if (w$p.value < 0.001) {
49+
significant = '***';
50+
} else if (w$p.value < 0.01) {
51+
significant = '**';
52+
} else if (w$p.value < 0.05) {
53+
significant = '*';
54+
}
55+
56+
r = list(
57+
improvement = improvement,
58+
significant = significant,
59+
p.value = w$p.value
60+
);
61+
return(data.frame(r));
62+
});
63+
64+
65+
# Set the benchmark names as the row.names to left align them in the print
66+
row.names(statistics) = statistics$name;
67+
statistics$name = NULL;
68+
69+
options(width = 200);
70+
print(statistics);

benchmark/compare.js

+67-162
Original file line numberDiff line numberDiff line change
@@ -1,181 +1,86 @@
11
'use strict';
2-
var usage = 'node benchmark/compare.js ' +
3-
'<node-binary1> <node-binary2> ' +
4-
'[--html] [--red|-r] [--green|-g] ' +
5-
'[-- <type> [testFilter]]';
62

7-
var show = 'both';
8-
var nodes = [];
9-
var html = false;
10-
var benchmarks;
3+
const fork = require('child_process').fork;
4+
const path = require('path');
5+
const CLI = require('./_cli.js');
6+
7+
//
8+
// Parse arguments
9+
//
10+
const cli = CLI(`usage: ./node compare.js [options] [--] <category> ...
11+
Run each benchmark in the <category> directory many times using two diffrent
12+
node versions. More than one <category> directory can be specified.
13+
The output is formatted as csv, which can be processed using for
14+
example 'compare.R'.
15+
16+
--new ./new-node-binary new node binary (required)
17+
--old ./old-node-binary old node binary (required)
18+
--runs 30 number of samples
19+
--filter pattern string to filter benchmark scripts
20+
--set variable=value set benchmark variable (can be repeated)
21+
`, {
22+
arrayArgs: ['set']
23+
});
24+
25+
if (!cli.optional.new || !cli.optional.old) {
26+
cli.abort(cli.usage);
27+
return;
28+
}
1129

12-
for (var i = 2; i < process.argv.length; i++) {
13-
var arg = process.argv[i];
14-
switch (arg) {
15-
case '--red': case '-r':
16-
show = show === 'green' ? 'both' : 'red';
17-
break;
18-
case '--green': case '-g':
19-
show = show === 'red' ? 'both' : 'green';
20-
break;
21-
case '--html':
22-
html = true;
23-
break;
24-
case '-h': case '-?': case '--help':
25-
console.log(usage);
26-
process.exit(0);
27-
break;
28-
case '--':
29-
benchmarks = [];
30-
break;
31-
default:
32-
if (Array.isArray(benchmarks))
33-
benchmarks.push(arg);
34-
else
35-
nodes.push(arg);
36-
break;
37-
}
30+
const binaries = ['old', 'new'];
31+
const runs = cli.optional.runs ? parseInt(cli.optional.runs, 10) : 30;
32+
const benchmarks = cli.benchmarks();
33+
34+
if (benchmarks.length === 0) {
35+
console.error('no benchmarks found');
36+
process.exit(1);
3837
}
3938

40-
var start, green, red, reset, end;
41-
if (!html) {
42-
start = '';
43-
green = '\u001b[1;32m';
44-
red = '\u001b[1;31m';
45-
reset = '\u001b[m';
46-
end = '';
47-
} else {
48-
start = '<pre style="background-color:#333;color:#eee">';
49-
green = '<span style="background-color:#0f0;color:#000">';
50-
red = '<span style="background-color:#f00;color:#fff">';
51-
reset = '</span>';
52-
end = '</pre>';
39+
// Create queue from the benchmarks list such both node versions are tested
40+
// `runs` amount of times each.
41+
const queue = [];
42+
for (let iter = 0; iter < runs; iter++) {
43+
for (const filename of benchmarks) {
44+
for (const binary of binaries) {
45+
queue.push({ binary, filename, iter });
46+
}
47+
}
5348
}
5449

55-
var runBench = process.env.NODE_BENCH || 'bench';
50+
// Print csv header
51+
console.log('"binary", "filename", "configuration", "rate", "time"');
5652

57-
if (nodes.length !== 2)
58-
return console.error('usage:\n %s', usage);
53+
(function recursive(i) {
54+
const job = queue[i];
5955

60-
var spawn = require('child_process').spawn;
61-
var results = {};
62-
var toggle = 1;
63-
var r = (+process.env.NODE_BENCH_RUNS || 1) * 2;
56+
const child = fork(path.resolve(__dirname, job.filename), cli.optional.set, {
57+
execPath: cli.optional[job.binary]
58+
});
6459

65-
run();
66-
function run() {
67-
if (--r < 0)
68-
return compare();
69-
toggle = ++toggle % 2;
60+
child.on('message', function(data) {
61+
// Construct configuration string, " A=a, B=b, ..."
62+
let conf = '';
63+
for (const key of Object.keys(data.conf)) {
64+
conf += ' ' + key + '=' + JSON.stringify(data.conf[key]);
65+
}
66+
conf = conf.slice(1);
7067

71-
var node = nodes[toggle];
72-
console.error('running %s', node);
73-
var env = {};
74-
for (var i in process.env)
75-
env[i] = process.env[i];
76-
env.NODE = node;
68+
// Escape qoutes (") for correct csv formatting
69+
conf = conf.replace(/"/g, '""');
7770

78-
var out = '';
79-
var child;
80-
if (Array.isArray(benchmarks) && benchmarks.length) {
81-
child = spawn(
82-
node,
83-
['benchmark/run.js'].concat(benchmarks),
84-
{ env: env }
85-
);
86-
} else {
87-
child = spawn('make', [runBench], { env: env });
88-
}
89-
child.stdout.setEncoding('utf8');
90-
child.stdout.on('data', function(c) {
91-
out += c;
71+
console.log(`"${job.binary}", "${job.filename}", "${conf}", ` +
72+
`${data.rate}, ${data.time}`);
9273
});
9374

94-
child.stderr.pipe(process.stderr);
95-
96-
child.on('close', function(code) {
75+
child.once('close', function(code) {
9776
if (code) {
98-
console.error('%s exited with code=%d', node, code);
9977
process.exit(code);
100-
} else {
101-
out.trim().split(/\r?\n/).forEach(function(line) {
102-
line = line.trim();
103-
if (!line)
104-
return;
105-
106-
var s = line.split(':');
107-
var num = +s.pop();
108-
if (!num && num !== 0)
109-
return;
110-
111-
line = s.join(':');
112-
var res = results[line] = results[line] || {};
113-
res[node] = res[node] || [];
114-
res[node].push(num);
115-
});
116-
117-
run();
118-
}
119-
});
120-
}
121-
122-
function compare() {
123-
// each result is an object with {"foo.js arg=bar":12345,...}
124-
// compare each thing, and show which node did the best.
125-
// node[0] is shown in green, node[1] shown in red.
126-
var maxLen = -Infinity;
127-
var util = require('util');
128-
console.log(start);
129-
130-
Object.keys(results).map(function(bench) {
131-
var res = results[bench];
132-
var n0 = avg(res[nodes[0]]);
133-
var n1 = avg(res[nodes[1]]);
134-
135-
var pct = ((n0 - n1) / n1 * 100).toFixed(2);
136-
137-
var g = n0 > n1 ? green : '';
138-
var r = n0 > n1 ? '' : red;
139-
var c = r || g;
140-
141-
if (show === 'green' && !g || show === 'red' && !r)
14278
return;
79+
}
14380

144-
var r0 = util.format(
145-
'%s%s: %d%s',
146-
g,
147-
nodes[0],
148-
n0.toPrecision(5), g ? reset : ''
149-
);
150-
var r1 = util.format(
151-
'%s%s: %d%s',
152-
r,
153-
nodes[1],
154-
n1.toPrecision(5), r ? reset : ''
155-
);
156-
pct = c + pct + '%' + reset;
157-
var l = util.format('%s: %s %s', bench, r0, r1);
158-
maxLen = Math.max(l.length + pct.length, maxLen);
159-
return [l, pct];
160-
}).filter(function(l) {
161-
return l;
162-
}).forEach(function(line) {
163-
var l = line[0];
164-
var pct = line[1];
165-
var dotLen = maxLen - l.length - pct.length + 2;
166-
var dots = ' ' + new Array(Math.max(0, dotLen)).join('.') + ' ';
167-
console.log(l + dots + pct);
81+
// If there are more benchmarks execute the next
82+
if (i + 1 < queue.length) {
83+
recursive(i + 1);
84+
}
16885
});
169-
console.log(end);
170-
}
171-
172-
function avg(list) {
173-
if (list.length >= 3) {
174-
list = list.sort();
175-
var q = Math.floor(list.length / 4) || 1;
176-
list = list.slice(q, -q);
177-
}
178-
return list.reduce(function(a, b) {
179-
return a + b;
180-
}, 0) / list.length;
181-
}
86+
})(0);

0 commit comments

Comments
 (0)