ew_opt

ihsgnef · ihsgnef · commit d4cd03d82738 · 2018-10-24T16:33:36.000-04:00
diff --git a/gulpfile.js b/gulpfile.js
@@ -17,7 +17,7 @@ var build_dir = 'qanta-leaderboard/' // good to have this be the same as the rep
 
 var rankEntries = function (entries) {
   entries.sort(function (a, b) {
-    var curveDiff = Math.sign(b.curve - a.curve)
+    var curveDiff = Math.sign(b.ew - a.ew)
     return curveDiff
   })
 
@@ -28,7 +28,7 @@ var rankEntries = function (entries) {
     } else {
       var prevEntry = entries[i - 1]
       var rank = prevEntry.rank
-      if (entry.curve < prevEntry.curve) rank++
+      if (entry.ew < prevEntry.ew) rank++
       entry.rank = rank
     }
   }
@@ -58,13 +58,14 @@ var parseCompEntries = function (comp_file) {
         entry.link = description.substr(description.lastIndexOf('http')).trim()
       }
       entry.date = o_entry.submission.created
-      entry.sent1_acc = parseFloat(o_entry.scores.sent1_acc)
-      entry.eoq_acc = parseFloat(o_entry.scores.eoq_acc)
-      entry.curve = parseFloat(o_entry.scores.curve)
-      if (!(entry.sent1_acc >= 0)) throw 'Score invalid'
-      if (entry.sent1_acc < 0) throw 'Score too low'
-      if (!(entry.eoq_acc >= 0)) throw 'Score invalid'
-      if (entry.eoq_acc < 0) throw 'Score too low'
+      entry.first_acc = parseFloat(o_entry.scores.first_acc)
+      entry.end_acc = parseFloat(o_entry.scores.end_acc)
+      entry.ew = parseFloat(o_entry.scores.ew)
+      entry.ew_opt = parseFloat(o_entry.scores.ew_opt)
+      if (!(entry.first_acc >= 0)) throw 'Score invalid'
+      if (entry.first_acc < 0) throw 'Score too low'
+      if (!(entry.end_acc >= 0)) throw 'Score invalid'
+      if (entry.end_acc < 0) throw 'Score too low'
       if (entry.model_name === '') {
         entry.model_name = 'Unnamed submission by ' + entry.user
       }
@@ -94,10 +95,11 @@ var parseEntries = function (htmlStr) {
       entry.link = entry.description.substr(entry.description.lastIndexOf('http')).trim()
     }
     delete entry.description
-    entry.sent1_acc = parseFloat(cells.eq(3).text())
-    entry.eoq_acc = parseFloat(cells.eq(4).text())
-    entry.curve = parseFloat(cells.eq(5).text())
     entry.date = cells.eq(2).text().trim()
+    entry.first_acc = parseFloat(cells.eq(3).text())
+    entry.end_acc = parseFloat(cells.eq(4).text())
+    entry.ew = parseFloat(cells.eq(5).text())
+    entry.ew_opt = parseFloat(cells.eq(6).text())
     entries.push(entry)
   })
   entries = rankEntries(entries)
diff --git a/out-qanta.json b/out-qanta.json
@@ -124,9 +124,10 @@
                 "uuid": "0xfbd0a104e93c4277998c6af8d377c3e3"
             },
             "scores": {
-                "curve": 0.0025263154379570945,
-                "eoq_acc": 0.4685672514619883,
-                "sent1_acc": 0.0533625730994152
+                "ew": 0.0025263154379570945,
+                "ew_opt": 0.66666,
+                "end_acc": 0.4685672514619883,
+                "first_acc": 0.0533625730994152
             },
             "submission": {
                 "created": 1540396849,
@@ -139,4 +140,4 @@
         }
     ],
     "updated": 1540407472.607516
-}
+}
diff --git a/views/index.pug b/views/index.pug
@@ -19,10 +19,12 @@ mixin squad_2_model_display(group, is_test)
       th Model
       th first_acc
       th end_acc
-      th expected wins
-    - var largest_sent1_acc = Math.max.apply(null, group.map(function (model) { return model.sent1_acc; }))
-    - var largest_eoq_acc = Math.max.apply(null, group.map(function (model) { return model.eoq_acc; }))
-    - var largest_curve = Math.max.apply(null, group.map(function (model) { return model.curve; }))
+      th EW
+      th EW_OPT
+    - var largest_first_acc = Math.max.apply(null, group.map(function (model) { return model.first_acc; }))
+    - var largest_end_acc = Math.max.apply(null, group.map(function (model) { return model.end_acc; }))
+    - var largest_ew = Math.max.apply(null, group.map(function (model) { return model.ew; }))
+    - var largest_ew_opt = Math.max.apply(null, group.map(function (model) { return model.ew_opt; }))
     each model in group
       tr
         if is_test
@@ -35,20 +37,25 @@ mixin squad_2_model_display(group, is_test)
           if model.link
             a.link(href=model.link) #{model.link}
         td
-          if model.sent1_acc == largest_sent1_acc
-            b #{model.sent1_acc.toPrecision(5)}
+          if model.first_acc == largest_first_acc
+            b #{model.first_acc.toPrecision(3)}
           else
-            | #{model.sent1_acc.toPrecision(5)}
+            | #{model.first_acc.toPrecision(3)}
         td
-          if model.eoq_acc == largest_eoq_acc
-            b #{model.eoq_acc.toPrecision(5)}
+          if model.end_acc == largest_end_acc
+            b #{model.end_acc.toPrecision(3)}
           else
-            | #{model.eoq_acc.toPrecision(5)}
+            | #{model.end_acc.toPrecision(3)}
         td
-          if model.curve == largest_curve
-            b #{model.curve.toPrecision(5)}
+          if model.ew == largest_ew
+            b #{model.ew.toPrecision(3)}
           else
-            | #{model.curve.toPrecision(5)}
+            | #{model.ew.toPrecision(3)}
+        td
+          if model.ew_opt == largest_ew_opt
+            b #{model.ew_opt.toPrecision(3)}
+          else
+            | #{model.ew_opt.toPrecision(3)}
 block content
   .cover#contentCover
     .container
@@ -128,6 +135,7 @@ block content
               .infoHeadline
                 h2 Leaderboard
               p
-              | We evaluate each system with three metrics: accuracy at the end of the first sentence (first_acc) and at the end of the question (end_acc), and the 
-              a(href="https://worksheets.codalab.org/worksheets/0xfb3d16165dd24f69bb1ba9420fca9212/") expected wins.
+              | We evaluate each system with three metrics: accuracy at the end of the first sentence (first_acc) and at the end of the question (end_acc), and two new metrics: 
+              a(href="https://worksheets.codalab.org/worksheets/0xfb3d16165dd24f69bb1ba9420fca9212/") expected wins 
+              |  with system buzzer (EW) and with optimal buzzer (EW_OPT).
               +squad_2_model_display(test2, true)