@@ -35,22 +35,22 @@ if (!is.null(plot.filename)) {
35
35
ggsave(plot.filename , p );
36
36
}
37
37
38
- # computes the shared standard error, as used in the welch t-test
38
+ # Computes the shared standard error, as used in Welch's t-test.
39
39
welch.sd = function (old.rate , new.rate ) {
40
40
old.se.squared = var(old.rate ) / length(old.rate )
41
41
new.se.squared = var(new.rate ) / length(new.rate )
42
42
return (sqrt(old.se.squared + new.se.squared ))
43
43
}
44
44
45
- # calculate the improvement confidence interval. The improvement is calculated
45
+ # Calculate the improvement confidence interval. The improvement is calculated
46
46
# by dividing by old.mu and not new.mu, because old.mu is what the mean
47
47
# improvement is calculated relative to.
48
48
confidence.interval = function (shared.se , old.mu , w , risk ) {
49
49
interval = qt(1 - (risk / 2 ), w $ parameter ) * shared.se ;
50
50
return (sprintf(" ±%.2f%%" , (interval / old.mu ) * 100 ))
51
51
}
52
52
53
- # Print a table with results
53
+ # Calculate the statistics table.
54
54
statistics = ddply(dat , " name" , function (subdat ) {
55
55
old.rate = subset(subdat , binary == " old" )$ rate ;
56
56
new.rate = subset(subdat , binary == " new" )$ rate ;
@@ -68,14 +68,14 @@ statistics = ddply(dat, "name", function(subdat) {
68
68
" (***)" = " NA"
69
69
);
70
70
71
- # Check if there is enough data to calculate the calculate the p-value
71
+ # Check if there is enough data to calculate the p-value.
72
72
if (length(old.rate ) > 1 && length(new.rate ) > 1 ) {
73
- # Perform a statistics test to see of there actually is a difference in
73
+ # Perform a statistical test to see if there actually is a difference in
74
74
# performance.
75
75
w = t.test(rate ~ binary , data = subdat );
76
76
shared.se = welch.sd(old.rate , new.rate )
77
77
78
- # Add user friendly stars to the table. There should be at least one star
78
+ # Add user- friendly stars to the table. There should be at least one star
79
79
# before you can say that there is an improvement.
80
80
confidence = ' ' ;
81
81
if (w $ p.value < 0.001 ) {
@@ -99,7 +99,7 @@ statistics = ddply(dat, "name", function(subdat) {
99
99
});
100
100
101
101
102
- # Set the benchmark names as the row.names to left align them in the print
102
+ # Set the benchmark names as the row.names to left align them in the print.
103
103
row.names(statistics ) = statistics $ name ;
104
104
statistics $ name = NULL ;
105
105
@@ -108,7 +108,7 @@ print(statistics);
108
108
cat(" \n " )
109
109
cat(sprintf(
110
110
" Be aware that when doing many comparisons the risk of a false-positive
111
- result increases. In this case there are %d comparisons, you can thus
111
+ result increases. In this case, there are %d comparisons, you can thus
112
112
expect the following amount of false-positive results:
113
113
%.2f false positives, when considering a 5%% risk acceptance (*, **, ***),
114
114
%.2f false positives, when considering a 1%% risk acceptance (**, ***),
0 commit comments