From 851c419b47ddb588c012ac9b65bb1533d44b366f Mon Sep 17 00:00:00 2001
From: Brian White <mscdex@mscdex.net>
Date: Wed, 8 Feb 2017 03:23:03 -0500
Subject: [PATCH 1/2] querystring: fix empty pairs and optimize parse()

This commit fixes handling of empty pairs that occur before the end
of the query string so that they are also ignored.

Additionally, some optimizations have been made, including:

* Avoid unnecessary code execution where possible
* Use a lookup table when checking for hex characters
* Avoid forced decoding when '+' characters are encountered and we
are using the default decoder

Fixes: https://github.com/nodejs/node/issues/10454
---
 benchmark/querystring/querystring-parse.js |  40 ++--
 lib/querystring.js                         | 246 +++++++++++----------
 test/parallel/test-querystring.js          |   9 +
 3 files changed, 166 insertions(+), 129 deletions(-)

diff --git a/benchmark/querystring/querystring-parse.js b/benchmark/querystring/querystring-parse.js
index fe14d95a53f0a0..0ed677751280c0 100644
--- a/benchmark/querystring/querystring-parse.js
+++ b/benchmark/querystring/querystring-parse.js
@@ -12,7 +12,9 @@ var inputs = {
   multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
   multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
                   'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
-  manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
+  manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
+  manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
+  altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
 };
 
 var bench = common.createBenchmark(main, {
@@ -20,34 +22,38 @@ var bench = common.createBenchmark(main, {
   n: [1e6],
 });
 
+// A deopt followed by a reopt of main() can happen right when the timed loop
+// starts, which seems to have a noticeable effect on the benchmark results.
+// So we explicitly disable optimization of main() to avoid this potential
+// issue.
+v8.setFlagsFromString('--allow_natives_syntax');
+eval('%NeverOptimizeFunction(main)');
+
 function main(conf) {
   var type = conf.type;
   var n = conf.n | 0;
   var input = inputs[type];
+  var i;
 
-  // Force-optimize querystring.parse() so that the benchmark doesn't get
-  // disrupted by the optimizer kicking in halfway through.
-  v8.setFlagsFromString('--allow_natives_syntax');
-  if (type !== 'multicharsep') {
-    querystring.parse(input);
-    eval('%OptimizeFunctionOnNextCall(querystring.parse)');
-    querystring.parse(input);
-  } else {
-    querystring.parse(input, '&&&&&&&&&&');
-    eval('%OptimizeFunctionOnNextCall(querystring.parse)');
-    querystring.parse(input, '&&&&&&&&&&');
-  }
+  // Note: we do *not* use OptimizeFunctionOnNextCall() here because currently
+  // it causes a deopt followed by a reopt, which could make its way into the
+  // timed loop. Instead, just execute the function a "sufficient" number of
+  // times before the timed loop to ensure the function is optimized just once.
+  if (type === 'multicharsep') {
+    for (i = 0; i < n; i += 1)
+      querystring.parse(input, '&&&&&&&&&&');
 
-  var i;
-  if (type !== 'multicharsep') {
     bench.start();
     for (i = 0; i < n; i += 1)
-      querystring.parse(input);
+      querystring.parse(input, '&&&&&&&&&&');
     bench.end(n);
   } else {
+    for (i = 0; i < n; i += 1)
+      querystring.parse(input);
+
     bench.start();
     for (i = 0; i < n; i += 1)
-      querystring.parse(input, '&&&&&&&&&&');
+      querystring.parse(input);
     bench.end(n);
   }
 }
diff --git a/lib/querystring.js b/lib/querystring.js
index 9b4ca27640aa71..32ba30712090a4 100644
--- a/lib/querystring.js
+++ b/lib/querystring.js
@@ -250,6 +250,25 @@ function stringify(obj, sep, eq, options) {
   return '';
 }
 
+const isHexTable = [
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63
+  0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95
+  0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ...
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  // ... 256
+];
+
 function charCodes(str) {
   if (str.length === 0) return [];
   if (str.length === 1) return [str.charCodeAt(0)];
@@ -292,7 +311,6 @@ function parse(qs, sep, eq, options) {
   const customDecode = (decode !== qsUnescape);
 
   const keys = [];
-  var posIdx = 0;
   var lastPos = 0;
   var sepIdx = 0;
   var eqIdx = 0;
@@ -300,6 +318,7 @@ function parse(qs, sep, eq, options) {
   var value = '';
   var keyEncoded = customDecode;
   var valEncoded = customDecode;
+  const plusChar = (customDecode ? '%20' : ' ');
   var encodeCheck = 0;
   for (var i = 0; i < qs.length; ++i) {
     const code = qs.charCodeAt(i);
@@ -310,142 +329,145 @@ function parse(qs, sep, eq, options) {
         // Key/value pair separator match!
         const end = i - sepIdx + 1;
         if (eqIdx < eqLen) {
-          // If we didn't find the key/value separator, treat the substring as
-          // part of the key instead of the value
-          if (lastPos < end)
+          // We didn't find the (entire) key/value separator
+          if (lastPos < end) {
+            // Treat the substring as part of the key instead of the value
             key += qs.slice(lastPos, end);
-        } else if (lastPos < end)
-          value += qs.slice(lastPos, end);
-        if (keyEncoded)
-          key = decodeStr(key, decode);
-        if (valEncoded)
-          value = decodeStr(value, decode);
-
-        if (key || value || lastPos - posIdx > sepLen || i === 0) {
-          // Use a key array lookup instead of using hasOwnProperty(), which is
-          // slower
-          if (keys.indexOf(key) === -1) {
-            obj[key] = value;
-            keys[keys.length] = key;
+            if (keyEncoded)
+              key = decodeStr(key, decode);
           } else {
-            const curValue = obj[key] || '';
-            // A simple Array-specific property check is enough here to
-            // distinguish from a string value and is faster and still safe
-            // since we are generating all of the values being assigned.
-            if (curValue.pop)
-              curValue[curValue.length] = value;
-            else if (curValue)
-              obj[key] = [curValue, value];
+            // We saw an empty substring between separators
+            if (--pairs === 0)
+              return obj;
+            lastPos = i + 1;
+            sepIdx = eqIdx = 0;
+            continue;
+          }
+        } else {
+          if (lastPos < end) {
+            value += qs.slice(lastPos, end);
+            if (valEncoded)
+              value = decodeStr(value, decode);
           }
-        } else if (i === 1) {
-          // A pair with repeated sep could be added into obj in the first loop
-          // and it should be deleted
-          delete obj[key];
+          if (keyEncoded)
+            key = decodeStr(key, decode);
+        }
+
+        // Use a key array lookup instead of using hasOwnProperty(), which is
+        // slower
+        if (keys.indexOf(key) === -1) {
+          obj[key] = value;
+          keys[keys.length] = key;
+        } else {
+          const curValue = obj[key];
+          // A simple Array-specific property check is enough here to
+          // distinguish from a string value and is faster and still safe
+          // since we are generating all of the values being assigned.
+          if (curValue.pop)
+            curValue[curValue.length] = value;
+          else
+            obj[key] = [curValue, value];
         }
         if (--pairs === 0)
-          break;
+          return obj;
         keyEncoded = valEncoded = customDecode;
-        encodeCheck = 0;
         key = value = '';
-        posIdx = lastPos;
+        encodeCheck = 0;
         lastPos = i + 1;
         sepIdx = eqIdx = 0;
       }
-      continue;
     } else {
       sepIdx = 0;
-      if (!valEncoded) {
-        // Try to match an (valid) encoded byte (once) to minimize unnecessary
-        // calls to string decoding functions
-        if (code === 37/*%*/) {
-          encodeCheck = 1;
-        } else if (encodeCheck > 0 &&
-                   ((code >= 48/*0*/ && code <= 57/*9*/) ||
-                    (code >= 65/*A*/ && code <= 70/*F*/) ||
-                    (code >= 97/*a*/ && code <= 102/*f*/))) {
-          if (++encodeCheck === 3)
-            valEncoded = true;
+      // Try matching key/value separator (e.g. '=') if we haven't already
+      if (eqIdx < eqLen) {
+        if (code === eqCodes[eqIdx]) {
+          if (++eqIdx === eqLen) {
+            // Key/value separator match!
+            const end = i - eqIdx + 1;
+            if (lastPos < end)
+              key += qs.slice(lastPos, end);
+            encodeCheck = 0;
+            lastPos = i + 1;
+          }
+          continue;
         } else {
-          encodeCheck = 0;
+          eqIdx = 0;
+          if (!keyEncoded) {
+            // Try to match an (valid) encoded byte once to minimize unnecessary
+            // calls to string decoding functions
+            if (code === 37/*%*/) {
+              encodeCheck = 1;
+              continue;
+            } else if (encodeCheck > 0) {
+              // eslint-disable-next-line no-extra-boolean-cast
+              if (!!isHexTable[code]) {
+                if (++encodeCheck === 3)
+                  keyEncoded = true;
+                continue;
+              } else {
+                encodeCheck = 0;
+              }
+            }
+          }
         }
-      }
-    }
-
-    // Try matching key/value separator (e.g. '=') if we haven't already
-    if (eqIdx < eqLen) {
-      if (code === eqCodes[eqIdx]) {
-        if (++eqIdx === eqLen) {
-          // Key/value separator match!
-          const end = i - eqIdx + 1;
-          if (lastPos < end)
-            key += qs.slice(lastPos, end);
-          encodeCheck = 0;
+        if (code === 43/*+*/) {
+          if (lastPos < i)
+            key += qs.slice(lastPos, i);
+          key += plusChar;
           lastPos = i + 1;
+          continue;
         }
-        continue;
-      } else {
-        eqIdx = 0;
-        if (!keyEncoded) {
-          // Try to match an (valid) encoded byte once to minimize unnecessary
-          // calls to string decoding functions
-          if (code === 37/*%*/) {
-            encodeCheck = 1;
-          } else if (encodeCheck > 0 &&
-                     ((code >= 48/*0*/ && code <= 57/*9*/) ||
-                      (code >= 65/*A*/ && code <= 70/*F*/) ||
-                      (code >= 97/*a*/ && code <= 102/*f*/))) {
+      }
+      if (code === 43/*+*/) {
+        if (lastPos < i)
+          value += qs.slice(lastPos, i);
+        value += plusChar;
+        lastPos = i + 1;
+      } else if (!valEncoded) {
+        // Try to match an (valid) encoded byte (once) to minimize unnecessary
+        // calls to string decoding functions
+        if (code === 37/*%*/) {
+          encodeCheck = 1;
+        } else if (encodeCheck > 0) {
+          // eslint-disable-next-line no-extra-boolean-cast
+          if (!!isHexTable[code]) {
             if (++encodeCheck === 3)
-              keyEncoded = true;
+              valEncoded = true;
           } else {
             encodeCheck = 0;
           }
         }
       }
     }
-
-    if (code === 43/*+*/) {
-      if (eqIdx < eqLen) {
-        if (lastPos < i)
-          key += qs.slice(lastPos, i);
-        key += '%20';
-        keyEncoded = true;
-      } else {
-        if (lastPos < i)
-          value += qs.slice(lastPos, i);
-        value += '%20';
-        valEncoded = true;
-      }
-      lastPos = i + 1;
-    }
   }
 
-  // Check if we have leftover key or value data
-  if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
-    if (lastPos < qs.length) {
-      if (eqIdx < eqLen)
-        key += qs.slice(lastPos);
-      else if (sepIdx < sepLen)
-        value += qs.slice(lastPos);
-    }
-    if (keyEncoded)
-      key = decodeStr(key, decode);
-    if (valEncoded)
-      value = decodeStr(value, decode);
-    // Use a key array lookup instead of using hasOwnProperty(), which is
-    // slower
-    if (keys.indexOf(key) === -1) {
-      obj[key] = value;
-      keys[keys.length] = key;
-    } else {
-      const curValue = obj[key];
-      // A simple Array-specific property check is enough here to
-      // distinguish from a string value and is faster and still safe since
-      // we are generating all of the values being assigned.
-      if (curValue.pop)
-        curValue[curValue.length] = value;
-      else
-        obj[key] = [curValue, value];
-    }
+  // Deal with any leftover key or value data
+  if (lastPos < qs.length) {
+    if (eqIdx < eqLen)
+      key += qs.slice(lastPos);
+    else if (sepIdx < sepLen)
+      value += qs.slice(lastPos);
+  } else if (eqIdx === 0) {
+    // We ended on an empty substring
+    return obj;
+  }
+  if (keyEncoded)
+    key = decodeStr(key, decode);
+  if (valEncoded)
+    value = decodeStr(value, decode);
+  // Use a key array lookup instead of using hasOwnProperty(), which is slower
+  if (keys.indexOf(key) === -1) {
+    obj[key] = value;
+    keys[keys.length] = key;
+  } else {
+    const curValue = obj[key];
+    // A simple Array-specific property check is enough here to
+    // distinguish from a string value and is faster and still safe since
+    // we are generating all of the values being assigned.
+    if (curValue.pop)
+      curValue[curValue.length] = value;
+    else
+      obj[key] = [curValue, value];
   }
 
   return obj;
diff --git a/test/parallel/test-querystring.js b/test/parallel/test-querystring.js
index baa426094c77c6..04034377fea76d 100644
--- a/test/parallel/test-querystring.js
+++ b/test/parallel/test-querystring.js
@@ -59,6 +59,15 @@ const qsTestCases = [
   ['&&&&', '', {}],
   ['&=&', '=', { '': '' }],
   ['&=&=', '=&=', { '': [ '', '' ]}],
+  ['a&&b', 'a=&b=', { 'a': '', 'b': '' }],
+  ['a=a&&b=b', 'a=a&b=b', { 'a': 'a', 'b': 'b' }],
+  ['&a', 'a=', { 'a': '' }],
+  ['&=', '=', { '': '' }],
+  ['a&a&', 'a=&a=', { a: [ '', '' ] }],
+  ['a&a&a&', 'a=&a=&a=', { a: [ '', '', '' ] }],
+  ['a&a&a&a&', 'a=&a=&a=&a=', { a: [ '', '', '', '' ] }],
+  ['a=&a=value&a=', 'a=&a=value&a=', { a: [ '', 'value', '' ] }],
+  ['foo+bar=baz+quux', 'foo%20bar=baz%20quux', { 'foo bar': 'baz quux' }],
   [null, '', {}],
   [undefined, '', {}]
 ];

From 7b725078362b452029b3c8530db376c0586a98c4 Mon Sep 17 00:00:00 2001
From: Brian White <mscdex@mscdex.net>
Date: Wed, 8 Feb 2017 03:05:59 -0500
Subject: [PATCH 2/2] test: improve querystring.parse assertion messages

---
 test/parallel/test-querystring.js | 32 +++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/test/parallel/test-querystring.js b/test/parallel/test-querystring.js
index 04034377fea76d..3f397577dffe5c 100644
--- a/test/parallel/test-querystring.js
+++ b/test/parallel/test-querystring.js
@@ -1,6 +1,7 @@
 'use strict';
 require('../common');
 const assert = require('assert');
+const inspect = require('util').inspect;
 
 // test using assert
 const qs = require('querystring');
@@ -126,28 +127,43 @@ assert.strictEqual('918854443121279438895193',
                    qs.parse('id=918854443121279438895193').id);
 
 
-function check(actual, expected) {
+function check(actual, expected, input) {
   assert(!(actual instanceof Object));
-  assert.deepStrictEqual(Object.keys(actual).sort(),
-                         Object.keys(expected).sort());
-  Object.keys(expected).forEach(function(key) {
-    assert.deepStrictEqual(actual[key], expected[key]);
+  const actualKeys = Object.keys(actual).sort();
+  const expectedKeys = Object.keys(expected).sort();
+  let msg;
+  if (typeof input === 'string') {
+    msg = `Input: ${inspect(input)}\n` +
+          `Actual keys: ${inspect(actualKeys)}\n` +
+          `Expected keys: ${inspect(expectedKeys)}`;
+  }
+  assert.deepStrictEqual(actualKeys, expectedKeys, msg);
+  expectedKeys.forEach(function(key) {
+    if (typeof input === 'string') {
+      msg = `Input: ${inspect(input)}\n` +
+            `Key: ${inspect(key)}\n` +
+            `Actual value: ${inspect(actual[key])}\n` +
+            `Expected value: ${inspect(expected[key])}`;
+    } else {
+      msg = undefined;
+    }
+    assert.deepStrictEqual(actual[key], expected[key], msg);
   });
 }
 
 // test that the canonical qs is parsed properly.
 qsTestCases.forEach(function(testCase) {
-  check(qs.parse(testCase[0]), testCase[2]);
+  check(qs.parse(testCase[0]), testCase[2], testCase[0]);
 });
 
 // test that the colon test cases can do the same
 qsColonTestCases.forEach(function(testCase) {
-  check(qs.parse(testCase[0], ';', ':'), testCase[2]);
+  check(qs.parse(testCase[0], ';', ':'), testCase[2], testCase[0]);
 });
 
 // test the weird objects, that they get parsed properly
 qsWeirdObjects.forEach(function(testCase) {
-  check(qs.parse(testCase[1]), testCase[2]);
+  check(qs.parse(testCase[1]), testCase[2], testCase[1]);
 });
 
 qsNoMungeTestCases.forEach(function(testCase) {