Skip to content

Commit ff785fd

Browse files
mscdexjasnell
authored andcommitted
querystring: fix empty pairs and optimize parse()
This commit fixes handling of empty pairs that occur before the end of the query string so that they are also ignored. Additionally, some optimizations have been made, including: * Avoid unnecessary code execution where possible * Use a lookup table when checking for hex characters * Avoid forced decoding when '+' characters are encountered and we are using the default decoder Fixes: #10454 PR-URL: #11234 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Nicu Micleușanu <[email protected]>
1 parent d3be0f8 commit ff785fd

File tree

3 files changed

+166
-129
lines changed

3 files changed

+166
-129
lines changed

benchmark/querystring/querystring-parse.js

+23-17
Original file line numberDiff line numberDiff line change
@@ -12,42 +12,48 @@ var inputs = {
1212
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
1313
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
1414
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
15-
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
15+
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
16+
manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
17+
altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
1618
};
1719

1820
var bench = common.createBenchmark(main, {
1921
type: Object.keys(inputs),
2022
n: [1e6],
2123
});
2224

25+
// A deopt followed by a reopt of main() can happen right when the timed loop
26+
// starts, which seems to have a noticeable effect on the benchmark results.
27+
// So we explicitly disable optimization of main() to avoid this potential
28+
// issue.
29+
v8.setFlagsFromString('--allow_natives_syntax');
30+
eval('%NeverOptimizeFunction(main)');
31+
2332
function main(conf) {
2433
var type = conf.type;
2534
var n = conf.n | 0;
2635
var input = inputs[type];
36+
var i;
2737

28-
// Force-optimize querystring.parse() so that the benchmark doesn't get
29-
// disrupted by the optimizer kicking in halfway through.
30-
v8.setFlagsFromString('--allow_natives_syntax');
31-
if (type !== 'multicharsep') {
32-
querystring.parse(input);
33-
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
34-
querystring.parse(input);
35-
} else {
36-
querystring.parse(input, '&&&&&&&&&&');
37-
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
38-
querystring.parse(input, '&&&&&&&&&&');
39-
}
38+
// Note: we do *not* use OptimizeFunctionOnNextCall() here because currently
39+
// it causes a deopt followed by a reopt, which could make its way into the
40+
// timed loop. Instead, just execute the function a "sufficient" number of
41+
// times before the timed loop to ensure the function is optimized just once.
42+
if (type === 'multicharsep') {
43+
for (i = 0; i < n; i += 1)
44+
querystring.parse(input, '&&&&&&&&&&');
4045

41-
var i;
42-
if (type !== 'multicharsep') {
4346
bench.start();
4447
for (i = 0; i < n; i += 1)
45-
querystring.parse(input);
48+
querystring.parse(input, '&&&&&&&&&&');
4649
bench.end(n);
4750
} else {
51+
for (i = 0; i < n; i += 1)
52+
querystring.parse(input);
53+
4854
bench.start();
4955
for (i = 0; i < n; i += 1)
50-
querystring.parse(input, '&&&&&&&&&&');
56+
querystring.parse(input);
5157
bench.end(n);
5258
}
5359
}

lib/querystring.js

+134-112
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,25 @@ function stringify(obj, sep, eq, options) {
250250
return '';
251251
}
252252

253+
const isHexTable = [
254+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15
255+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31
256+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47
257+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63
258+
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79
259+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95
260+
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111
261+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127
262+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ...
263+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
264+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
265+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
266+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
267+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
268+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
269+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // ... 256
270+
];
271+
253272
function charCodes(str) {
254273
if (str.length === 0) return [];
255274
if (str.length === 1) return [str.charCodeAt(0)];
@@ -292,14 +311,14 @@ function parse(qs, sep, eq, options) {
292311
const customDecode = (decode !== qsUnescape);
293312

294313
const keys = [];
295-
var posIdx = 0;
296314
var lastPos = 0;
297315
var sepIdx = 0;
298316
var eqIdx = 0;
299317
var key = '';
300318
var value = '';
301319
var keyEncoded = customDecode;
302320
var valEncoded = customDecode;
321+
const plusChar = (customDecode ? '%20' : ' ');
303322
var encodeCheck = 0;
304323
for (var i = 0; i < qs.length; ++i) {
305324
const code = qs.charCodeAt(i);
@@ -310,142 +329,145 @@ function parse(qs, sep, eq, options) {
310329
// Key/value pair separator match!
311330
const end = i - sepIdx + 1;
312331
if (eqIdx < eqLen) {
313-
// If we didn't find the key/value separator, treat the substring as
314-
// part of the key instead of the value
315-
if (lastPos < end)
332+
// We didn't find the (entire) key/value separator
333+
if (lastPos < end) {
334+
// Treat the substring as part of the key instead of the value
316335
key += qs.slice(lastPos, end);
317-
} else if (lastPos < end)
318-
value += qs.slice(lastPos, end);
319-
if (keyEncoded)
320-
key = decodeStr(key, decode);
321-
if (valEncoded)
322-
value = decodeStr(value, decode);
323-
324-
if (key || value || lastPos - posIdx > sepLen || i === 0) {
325-
// Use a key array lookup instead of using hasOwnProperty(), which is
326-
// slower
327-
if (keys.indexOf(key) === -1) {
328-
obj[key] = value;
329-
keys[keys.length] = key;
336+
if (keyEncoded)
337+
key = decodeStr(key, decode);
330338
} else {
331-
const curValue = obj[key] || '';
332-
// A simple Array-specific property check is enough here to
333-
// distinguish from a string value and is faster and still safe
334-
// since we are generating all of the values being assigned.
335-
if (curValue.pop)
336-
curValue[curValue.length] = value;
337-
else if (curValue)
338-
obj[key] = [curValue, value];
339+
// We saw an empty substring between separators
340+
if (--pairs === 0)
341+
return obj;
342+
lastPos = i + 1;
343+
sepIdx = eqIdx = 0;
344+
continue;
345+
}
346+
} else {
347+
if (lastPos < end) {
348+
value += qs.slice(lastPos, end);
349+
if (valEncoded)
350+
value = decodeStr(value, decode);
339351
}
340-
} else if (i === 1) {
341-
// A pair with repeated sep could be added into obj in the first loop
342-
// and it should be deleted
343-
delete obj[key];
352+
if (keyEncoded)
353+
key = decodeStr(key, decode);
354+
}
355+
356+
// Use a key array lookup instead of using hasOwnProperty(), which is
357+
// slower
358+
if (keys.indexOf(key) === -1) {
359+
obj[key] = value;
360+
keys[keys.length] = key;
361+
} else {
362+
const curValue = obj[key];
363+
// A simple Array-specific property check is enough here to
364+
// distinguish from a string value and is faster and still safe
365+
// since we are generating all of the values being assigned.
366+
if (curValue.pop)
367+
curValue[curValue.length] = value;
368+
else
369+
obj[key] = [curValue, value];
344370
}
345371
if (--pairs === 0)
346-
break;
372+
return obj;
347373
keyEncoded = valEncoded = customDecode;
348-
encodeCheck = 0;
349374
key = value = '';
350-
posIdx = lastPos;
375+
encodeCheck = 0;
351376
lastPos = i + 1;
352377
sepIdx = eqIdx = 0;
353378
}
354-
continue;
355379
} else {
356380
sepIdx = 0;
357-
if (!valEncoded) {
358-
// Try to match an (valid) encoded byte (once) to minimize unnecessary
359-
// calls to string decoding functions
360-
if (code === 37/*%*/) {
361-
encodeCheck = 1;
362-
} else if (encodeCheck > 0 &&
363-
((code >= 48/*0*/ && code <= 57/*9*/) ||
364-
(code >= 65/*A*/ && code <= 70/*F*/) ||
365-
(code >= 97/*a*/ && code <= 102/*f*/))) {
366-
if (++encodeCheck === 3)
367-
valEncoded = true;
381+
// Try matching key/value separator (e.g. '=') if we haven't already
382+
if (eqIdx < eqLen) {
383+
if (code === eqCodes[eqIdx]) {
384+
if (++eqIdx === eqLen) {
385+
// Key/value separator match!
386+
const end = i - eqIdx + 1;
387+
if (lastPos < end)
388+
key += qs.slice(lastPos, end);
389+
encodeCheck = 0;
390+
lastPos = i + 1;
391+
}
392+
continue;
368393
} else {
369-
encodeCheck = 0;
394+
eqIdx = 0;
395+
if (!keyEncoded) {
396+
// Try to match an (valid) encoded byte once to minimize unnecessary
397+
// calls to string decoding functions
398+
if (code === 37/*%*/) {
399+
encodeCheck = 1;
400+
continue;
401+
} else if (encodeCheck > 0) {
402+
// eslint-disable-next-line no-extra-boolean-cast
403+
if (!!isHexTable[code]) {
404+
if (++encodeCheck === 3)
405+
keyEncoded = true;
406+
continue;
407+
} else {
408+
encodeCheck = 0;
409+
}
410+
}
411+
}
370412
}
371-
}
372-
}
373-
374-
// Try matching key/value separator (e.g. '=') if we haven't already
375-
if (eqIdx < eqLen) {
376-
if (code === eqCodes[eqIdx]) {
377-
if (++eqIdx === eqLen) {
378-
// Key/value separator match!
379-
const end = i - eqIdx + 1;
380-
if (lastPos < end)
381-
key += qs.slice(lastPos, end);
382-
encodeCheck = 0;
413+
if (code === 43/*+*/) {
414+
if (lastPos < i)
415+
key += qs.slice(lastPos, i);
416+
key += plusChar;
383417
lastPos = i + 1;
418+
continue;
384419
}
385-
continue;
386-
} else {
387-
eqIdx = 0;
388-
if (!keyEncoded) {
389-
// Try to match an (valid) encoded byte once to minimize unnecessary
390-
// calls to string decoding functions
391-
if (code === 37/*%*/) {
392-
encodeCheck = 1;
393-
} else if (encodeCheck > 0 &&
394-
((code >= 48/*0*/ && code <= 57/*9*/) ||
395-
(code >= 65/*A*/ && code <= 70/*F*/) ||
396-
(code >= 97/*a*/ && code <= 102/*f*/))) {
420+
}
421+
if (code === 43/*+*/) {
422+
if (lastPos < i)
423+
value += qs.slice(lastPos, i);
424+
value += plusChar;
425+
lastPos = i + 1;
426+
} else if (!valEncoded) {
427+
// Try to match an (valid) encoded byte (once) to minimize unnecessary
428+
// calls to string decoding functions
429+
if (code === 37/*%*/) {
430+
encodeCheck = 1;
431+
} else if (encodeCheck > 0) {
432+
// eslint-disable-next-line no-extra-boolean-cast
433+
if (!!isHexTable[code]) {
397434
if (++encodeCheck === 3)
398-
keyEncoded = true;
435+
valEncoded = true;
399436
} else {
400437
encodeCheck = 0;
401438
}
402439
}
403440
}
404441
}
405-
406-
if (code === 43/*+*/) {
407-
if (eqIdx < eqLen) {
408-
if (lastPos < i)
409-
key += qs.slice(lastPos, i);
410-
key += '%20';
411-
keyEncoded = true;
412-
} else {
413-
if (lastPos < i)
414-
value += qs.slice(lastPos, i);
415-
value += '%20';
416-
valEncoded = true;
417-
}
418-
lastPos = i + 1;
419-
}
420442
}
421443

422-
// Check if we have leftover key or value data
423-
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
424-
if (lastPos < qs.length) {
425-
if (eqIdx < eqLen)
426-
key += qs.slice(lastPos);
427-
else if (sepIdx < sepLen)
428-
value += qs.slice(lastPos);
429-
}
430-
if (keyEncoded)
431-
key = decodeStr(key, decode);
432-
if (valEncoded)
433-
value = decodeStr(value, decode);
434-
// Use a key array lookup instead of using hasOwnProperty(), which is
435-
// slower
436-
if (keys.indexOf(key) === -1) {
437-
obj[key] = value;
438-
keys[keys.length] = key;
439-
} else {
440-
const curValue = obj[key];
441-
// A simple Array-specific property check is enough here to
442-
// distinguish from a string value and is faster and still safe since
443-
// we are generating all of the values being assigned.
444-
if (curValue.pop)
445-
curValue[curValue.length] = value;
446-
else
447-
obj[key] = [curValue, value];
448-
}
444+
// Deal with any leftover key or value data
445+
if (lastPos < qs.length) {
446+
if (eqIdx < eqLen)
447+
key += qs.slice(lastPos);
448+
else if (sepIdx < sepLen)
449+
value += qs.slice(lastPos);
450+
} else if (eqIdx === 0) {
451+
// We ended on an empty substring
452+
return obj;
453+
}
454+
if (keyEncoded)
455+
key = decodeStr(key, decode);
456+
if (valEncoded)
457+
value = decodeStr(value, decode);
458+
// Use a key array lookup instead of using hasOwnProperty(), which is slower
459+
if (keys.indexOf(key) === -1) {
460+
obj[key] = value;
461+
keys[keys.length] = key;
462+
} else {
463+
const curValue = obj[key];
464+
// A simple Array-specific property check is enough here to
465+
// distinguish from a string value and is faster and still safe since
466+
// we are generating all of the values being assigned.
467+
if (curValue.pop)
468+
curValue[curValue.length] = value;
469+
else
470+
obj[key] = [curValue, value];
449471
}
450472

451473
return obj;

test/parallel/test-querystring.js

+9
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ const qsTestCases = [
5959
['&&&&', '', {}],
6060
['&=&', '=', { '': '' }],
6161
['&=&=', '=&=', { '': [ '', '' ]}],
62+
['a&&b', 'a=&b=', { 'a': '', 'b': '' }],
63+
['a=a&&b=b', 'a=a&b=b', { 'a': 'a', 'b': 'b' }],
64+
['&a', 'a=', { 'a': '' }],
65+
['&=', '=', { '': '' }],
66+
['a&a&', 'a=&a=', { a: [ '', '' ] }],
67+
['a&a&a&', 'a=&a=&a=', { a: [ '', '', '' ] }],
68+
['a&a&a&a&', 'a=&a=&a=&a=', { a: [ '', '', '', '' ] }],
69+
['a=&a=value&a=', 'a=&a=value&a=', { a: [ '', 'value', '' ] }],
70+
['foo+bar=baz+quux', 'foo%20bar=baz%20quux', { 'foo bar': 'baz quux' }],
6271
[null, '', {}],
6372
[undefined, '', {}]
6473
];

0 commit comments

Comments
 (0)