Skip to content

Commit d4cd03d

Browse files
author
ihsgnef
committedOct 24, 2018
ew_opt
1 parent 945d9b3 commit d4cd03d

File tree

3 files changed

+42
-31
lines changed

3 files changed

+42
-31
lines changed
 

‎gulpfile.js

+14-12
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ var build_dir = 'qanta-leaderboard/' // good to have this be the same as the rep
1717

1818
var rankEntries = function (entries) {
1919
entries.sort(function (a, b) {
20-
var curveDiff = Math.sign(b.curve - a.curve)
20+
var curveDiff = Math.sign(b.ew - a.ew)
2121
return curveDiff
2222
})
2323

@@ -28,7 +28,7 @@ var rankEntries = function (entries) {
2828
} else {
2929
var prevEntry = entries[i - 1]
3030
var rank = prevEntry.rank
31-
if (entry.curve < prevEntry.curve) rank++
31+
if (entry.ew < prevEntry.ew) rank++
3232
entry.rank = rank
3333
}
3434
}
@@ -58,13 +58,14 @@ var parseCompEntries = function (comp_file) {
5858
entry.link = description.substr(description.lastIndexOf('http')).trim()
5959
}
6060
entry.date = o_entry.submission.created
61-
entry.sent1_acc = parseFloat(o_entry.scores.sent1_acc)
62-
entry.eoq_acc = parseFloat(o_entry.scores.eoq_acc)
63-
entry.curve = parseFloat(o_entry.scores.curve)
64-
if (!(entry.sent1_acc >= 0)) throw 'Score invalid'
65-
if (entry.sent1_acc < 0) throw 'Score too low'
66-
if (!(entry.eoq_acc >= 0)) throw 'Score invalid'
67-
if (entry.eoq_acc < 0) throw 'Score too low'
61+
entry.first_acc = parseFloat(o_entry.scores.first_acc)
62+
entry.end_acc = parseFloat(o_entry.scores.end_acc)
63+
entry.ew = parseFloat(o_entry.scores.ew)
64+
entry.ew_opt = parseFloat(o_entry.scores.ew_opt)
65+
if (!(entry.first_acc >= 0)) throw 'Score invalid'
66+
if (entry.first_acc < 0) throw 'Score too low'
67+
if (!(entry.end_acc >= 0)) throw 'Score invalid'
68+
if (entry.end_acc < 0) throw 'Score too low'
6869
if (entry.model_name === '') {
6970
entry.model_name = 'Unnamed submission by ' + entry.user
7071
}
@@ -94,10 +95,11 @@ var parseEntries = function (htmlStr) {
9495
entry.link = entry.description.substr(entry.description.lastIndexOf('http')).trim()
9596
}
9697
delete entry.description
97-
entry.sent1_acc = parseFloat(cells.eq(3).text())
98-
entry.eoq_acc = parseFloat(cells.eq(4).text())
99-
entry.curve = parseFloat(cells.eq(5).text())
10098
entry.date = cells.eq(2).text().trim()
99+
entry.first_acc = parseFloat(cells.eq(3).text())
100+
entry.end_acc = parseFloat(cells.eq(4).text())
101+
entry.ew = parseFloat(cells.eq(5).text())
102+
entry.ew_opt = parseFloat(cells.eq(6).text())
101103
entries.push(entry)
102104
})
103105
entries = rankEntries(entries)

‎out-qanta.json

+5-4
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,10 @@
124124
"uuid": "0xfbd0a104e93c4277998c6af8d377c3e3"
125125
},
126126
"scores": {
127-
"curve": 0.0025263154379570945,
128-
"eoq_acc": 0.4685672514619883,
129-
"sent1_acc": 0.0533625730994152
127+
"ew": 0.0025263154379570945,
128+
"ew_opt": 0.66666,
129+
"end_acc": 0.4685672514619883,
130+
"first_acc": 0.0533625730994152
130131
},
131132
"submission": {
132133
"created": 1540396849,
@@ -139,4 +140,4 @@
139140
}
140141
],
141142
"updated": 1540407472.607516
142-
}
143+
}

‎views/index.pug

+23-15
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,12 @@ mixin squad_2_model_display(group, is_test)
1919
th Model
2020
th first_acc
2121
th end_acc
22-
th expected wins
23-
- var largest_sent1_acc = Math.max.apply(null, group.map(function (model) { return model.sent1_acc; }))
24-
- var largest_eoq_acc = Math.max.apply(null, group.map(function (model) { return model.eoq_acc; }))
25-
- var largest_curve = Math.max.apply(null, group.map(function (model) { return model.curve; }))
22+
th EW
23+
th EW_OPT
24+
- var largest_first_acc = Math.max.apply(null, group.map(function (model) { return model.first_acc; }))
25+
- var largest_end_acc = Math.max.apply(null, group.map(function (model) { return model.end_acc; }))
26+
- var largest_ew = Math.max.apply(null, group.map(function (model) { return model.ew; }))
27+
- var largest_ew_opt = Math.max.apply(null, group.map(function (model) { return model.ew_opt; }))
2628
each model in group
2729
tr
2830
if is_test
@@ -35,20 +37,25 @@ mixin squad_2_model_display(group, is_test)
3537
if model.link
3638
a.link(href=model.link) #{model.link}
3739
td
38-
if model.sent1_acc == largest_sent1_acc
39-
b #{model.sent1_acc.toPrecision(5)}
40+
if model.first_acc == largest_first_acc
41+
b #{model.first_acc.toPrecision(3)}
4042
else
41-
| #{model.sent1_acc.toPrecision(5)}
43+
| #{model.first_acc.toPrecision(3)}
4244
td
43-
if model.eoq_acc == largest_eoq_acc
44-
b #{model.eoq_acc.toPrecision(5)}
45+
if model.end_acc == largest_end_acc
46+
b #{model.end_acc.toPrecision(3)}
4547
else
46-
| #{model.eoq_acc.toPrecision(5)}
48+
| #{model.end_acc.toPrecision(3)}
4749
td
48-
if model.curve == largest_curve
49-
b #{model.curve.toPrecision(5)}
50+
if model.ew == largest_ew
51+
b #{model.ew.toPrecision(3)}
5052
else
51-
| #{model.curve.toPrecision(5)}
53+
| #{model.ew.toPrecision(3)}
54+
td
55+
if model.ew_opt == largest_ew_opt
56+
b #{model.ew_opt.toPrecision(3)}
57+
else
58+
| #{model.ew_opt.toPrecision(3)}
5259
block content
5360
.cover#contentCover
5461
.container
@@ -128,6 +135,7 @@ block content
128135
.infoHeadline
129136
h2 Leaderboard
130137
p
131-
| We evaluate each system with three metrics: accuracy at the end of the first sentence (first_acc) and at the end of the question (end_acc), and the
132-
a(href="https://worksheets.codalab.org/worksheets/0xfb3d16165dd24f69bb1ba9420fca9212/") expected wins.
138+
| We evaluate each system with three metrics: accuracy at the end of the first sentence (first_acc) and at the end of the question (end_acc), and two new metrics:
139+
a(href="https://worksheets.codalab.org/worksheets/0xfb3d16165dd24f69bb1ba9420fca9212/") expected wins
140+
| with system buzzer (EW) and with optimal buzzer (EW_OPT).
133141
+squad_2_model_display(test2, true)

0 commit comments

Comments
 (0)