Skip to content

Commit 96cec1e

Browse files
tniessentargos
authored andcommitted
benchmark: improve explanations in R script
PR-URL: #36995 Reviewed-By: Antoine du Hamel <[email protected]> Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: Pooja D P <[email protected]> Reviewed-By: Jiawen Geng <[email protected]> Reviewed-By: Juan José Arboleda <[email protected]> Reviewed-By: Darshan Sen <[email protected]> Reviewed-By: James M Snell <[email protected]>
1 parent c4cab1f commit 96cec1e

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

benchmark/compare.R

+8-8
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,22 @@ if (!is.null(plot.filename)) {
3535
ggsave(plot.filename, p);
3636
}
3737

38-
# computes the shared standard error, as used in the welch t-test
38+
# Computes the shared standard error, as used in Welch's t-test.
3939
welch.sd = function (old.rate, new.rate) {
4040
old.se.squared = var(old.rate) / length(old.rate)
4141
new.se.squared = var(new.rate) / length(new.rate)
4242
return(sqrt(old.se.squared + new.se.squared))
4343
}
4444

45-
# calculate the improvement confidence interval. The improvement is calculated
45+
# Calculate the improvement confidence interval. The improvement is calculated
4646
# by dividing by old.mu and not new.mu, because old.mu is what the mean
4747
# improvement is calculated relative to.
4848
confidence.interval = function (shared.se, old.mu, w, risk) {
4949
interval = qt(1 - (risk / 2), w$parameter) * shared.se;
5050
return(sprintf("±%.2f%%", (interval / old.mu) * 100))
5151
}
5252

53-
# Print a table with results
53+
# Calculate the statistics table.
5454
statistics = ddply(dat, "name", function(subdat) {
5555
old.rate = subset(subdat, binary == "old")$rate;
5656
new.rate = subset(subdat, binary == "new")$rate;
@@ -68,14 +68,14 @@ statistics = ddply(dat, "name", function(subdat) {
6868
"(***)" = "NA"
6969
);
7070

71-
# Check if there is enough data to calculate the calculate the p-value
71+
# Check if there is enough data to calculate the p-value.
7272
if (length(old.rate) > 1 && length(new.rate) > 1) {
73-
# Perform a statistics test to see of there actually is a difference in
73+
# Perform a statistical test to see if there actually is a difference in
7474
# performance.
7575
w = t.test(rate ~ binary, data=subdat);
7676
shared.se = welch.sd(old.rate, new.rate)
7777

78-
# Add user friendly stars to the table. There should be at least one star
78+
# Add user-friendly stars to the table. There should be at least one star
7979
# before you can say that there is an improvement.
8080
confidence = '';
8181
if (w$p.value < 0.001) {
@@ -99,7 +99,7 @@ statistics = ddply(dat, "name", function(subdat) {
9999
});
100100

101101

102-
# Set the benchmark names as the row.names to left align them in the print
102+
# Set the benchmark names as the row.names to left align them in the print.
103103
row.names(statistics) = statistics$name;
104104
statistics$name = NULL;
105105

@@ -108,7 +108,7 @@ print(statistics);
108108
cat("\n")
109109
cat(sprintf(
110110
"Be aware that when doing many comparisons the risk of a false-positive
111-
result increases. In this case there are %d comparisons, you can thus
111+
result increases. In this case, there are %d comparisons, you can thus
112112
expect the following amount of false-positive results:
113113
%.2f false positives, when considering a 5%% risk acceptance (*, **, ***),
114114
%.2f false positives, when considering a 1%% risk acceptance (**, ***),

0 commit comments

Comments
 (0)