Skip to content

Commit a2a69a2

Browse files
committed
querystring: improve parse() performance
This commit improves parse() performance by ~20-200% with the various querystring-parse benchmarks. Some optimization strategies used in this commit include: * Combining multiple searches (for '&', '=', and '+') on the same string into a single loop * Avoiding string.split() * Minimizing creation of temporary strings * Avoiding string decoding if no encoded bytes were found and the default string decoder is being used PR-URL: #5012 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Roman Reiss <[email protected]> Reviewed-By: Matteo Collina <[email protected]>
1 parent 90451a6 commit a2a69a2

File tree

3 files changed

+193
-64
lines changed

3 files changed

+193
-64
lines changed

benchmark/querystring/querystring-parse.js

+32-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@ var querystring = require('querystring');
33
var v8 = require('v8');
44

55
var bench = common.createBenchmark(main, {
6-
type: ['noencode', 'encodemany', 'encodelast', 'multivalue'],
6+
type: ['noencode',
7+
'multicharsep',
8+
'encodemany',
9+
'encodelast',
10+
'multivalue',
11+
'multivaluemany',
12+
'manypairs'],
713
n: [1e6],
814
});
915

@@ -13,22 +19,38 @@ function main(conf) {
1319

1420
var inputs = {
1521
noencode: 'foo=bar&baz=quux&xyzzy=thud',
22+
multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud',
1623
encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d',
1724
encodelast: 'foo=bar&baz=quux&xyzzy=thu%64',
18-
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz'
25+
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
26+
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
27+
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
28+
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
1929
};
2030
var input = inputs[type];
2131

2232
// Force-optimize querystring.parse() so that the benchmark doesn't get
2333
// disrupted by the optimizer kicking in halfway through.
24-
for (var name in inputs)
25-
querystring.parse(inputs[name]);
26-
2734
v8.setFlagsFromString('--allow_natives_syntax');
28-
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
29-
30-
bench.start();
31-
for (var i = 0; i < n; i += 1)
35+
if (type !== 'multicharsep') {
3236
querystring.parse(input);
33-
bench.end(n);
37+
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
38+
querystring.parse(input);
39+
} else {
40+
querystring.parse(input, '&&&&&&&&&&');
41+
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
42+
querystring.parse(input, '&&&&&&&&&&');
43+
}
44+
45+
if (type !== 'multicharsep') {
46+
bench.start();
47+
for (var i = 0; i < n; i += 1)
48+
querystring.parse(input);
49+
bench.end(n);
50+
} else {
51+
bench.start();
52+
for (var i = 0; i < n; i += 1)
53+
querystring.parse(input, '&&&&&&&&&&');
54+
bench.end(n);
55+
}
3456
}

lib/querystring.js

+158-54
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,14 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) {
7878
};
7979

8080

81-
QueryString.unescape = function(s, decodeSpaces) {
81+
function qsUnescape(s, decodeSpaces) {
8282
try {
8383
return decodeURIComponent(s);
8484
} catch (e) {
8585
return QueryString.unescapeBuffer(s, decodeSpaces).toString();
8686
}
87-
};
87+
}
88+
QueryString.unescape = qsUnescape;
8889

8990

9091
var hexTable = new Array(256);
@@ -198,87 +199,190 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) {
198199
return '';
199200
};
200201

201-
// Parse a key=val string.
202+
// Parse a key/val string.
202203
QueryString.parse = QueryString.decode = function(qs, sep, eq, options) {
203204
sep = sep || '&';
204205
eq = eq || '=';
205-
const eqLen = eq.length;
206-
var obj = {};
206+
207+
const obj = {};
207208

208209
if (typeof qs !== 'string' || qs.length === 0) {
209210
return obj;
210211
}
211212

213+
if (typeof sep !== 'string')
214+
sep += '';
215+
216+
const eqLen = eq.length;
217+
const sepLen = sep.length;
218+
212219
var maxKeys = 1000;
213220
if (options && typeof options.maxKeys === 'number') {
214221
maxKeys = options.maxKeys;
215222
}
216223

217-
// maxKeys <= 0 means that we should not limit keys count
218-
if (maxKeys > 0 && isFinite(maxKeys)) {
219-
qs = qs.split(sep, maxKeys);
220-
} else {
221-
qs = qs.split(sep);
222-
}
223-
224-
var len = qs.length;
224+
var pairs = Infinity;
225+
if (maxKeys > 0)
226+
pairs = maxKeys;
225227

226228
var decode = QueryString.unescape;
227229
if (options && typeof options.decodeURIComponent === 'function') {
228230
decode = options.decodeURIComponent;
229231
}
230-
231-
var keys = [];
232-
for (var i = 0; i < len; ++i) {
233-
// replacePlus() is used instead of a regexp because it is ~15-30% faster
234-
// with v8 4.7
235-
const x = replacePlus(qs[i]);
236-
const idx = x.indexOf(eq);
237-
var k, v;
238-
239-
if (idx >= 0) {
240-
k = decodeStr(x.substring(0, idx), decode);
241-
v = decodeStr(x.substring(idx + eqLen), decode);
232+
const customDecode = (decode !== qsUnescape);
233+
234+
const keys = [];
235+
var lastPos = 0;
236+
var sepIdx = 0;
237+
var eqIdx = 0;
238+
var key = '';
239+
var value = '';
240+
var keyEncoded = customDecode;
241+
var valEncoded = customDecode;
242+
var encodeCheck = 0;
243+
for (var i = 0; i < qs.length; ++i) {
244+
const code = qs.charCodeAt(i);
245+
246+
// Try matching key/value pair separator (e.g. '&')
247+
if (code === sep.charCodeAt(sepIdx)) {
248+
if (++sepIdx === sepLen) {
249+
// Key/value pair separator match!
250+
const end = i - sepIdx + 1;
251+
if (eqIdx < eqLen) {
252+
// If we didn't find the key/value separator, treat the substring as
253+
// part of the key instead of the value
254+
if (lastPos < end)
255+
key += qs.slice(lastPos, end);
256+
} else if (lastPos < end)
257+
value += qs.slice(lastPos, end);
258+
if (keyEncoded)
259+
key = decodeStr(key, decode);
260+
if (valEncoded)
261+
value = decodeStr(value, decode);
262+
// Use a key array lookup instead of using hasOwnProperty(), which is
263+
// slower
264+
if (keys.indexOf(key) === -1) {
265+
obj[key] = value;
266+
keys[keys.length] = key;
267+
} else {
268+
const curValue = obj[key];
269+
// `instanceof Array` is used instead of Array.isArray() because it
270+
// is ~15-20% faster with v8 4.7 and is safe to use because we are
271+
// using it with values being created within this function
272+
if (curValue instanceof Array)
273+
curValue[curValue.length] = value;
274+
else
275+
obj[key] = [curValue, value];
276+
}
277+
if (--pairs === 0)
278+
break;
279+
keyEncoded = valEncoded = customDecode;
280+
encodeCheck = 0;
281+
key = value = '';
282+
lastPos = i + 1;
283+
sepIdx = eqIdx = 0;
284+
}
285+
continue;
242286
} else {
243-
k = decodeStr(x, decode);
244-
v = '';
287+
sepIdx = 0;
288+
if (!valEncoded) {
289+
// Try to match an (valid) encoded byte (once) to minimize unnecessary
290+
// calls to string decoding functions
291+
if (code === 37/*%*/) {
292+
encodeCheck = 1;
293+
} else if (encodeCheck > 0 &&
294+
((code >= 48/*0*/ && code <= 57/*9*/) ||
295+
(code >= 65/*A*/ && code <= 70/*Z*/) ||
296+
(code >= 97/*a*/ && code <= 102/*z*/))) {
297+
if (++encodeCheck === 3)
298+
valEncoded = true;
299+
} else {
300+
encodeCheck = 0;
301+
}
302+
}
245303
}
246304

247-
// Use a key array lookup instead of using hasOwnProperty(), which is slower
248-
if (keys.indexOf(k) === -1) {
249-
obj[k] = v;
250-
keys.push(k);
251-
} else if (obj[k] instanceof Array) {
252-
// `instanceof Array` is used instead of Array.isArray() because it is
253-
// ~15-20% faster with v8 4.7 and is safe to use because we are using it
254-
// with values being created within this function
255-
obj[k].push(v);
305+
// Try matching key/value separator (e.g. '=') if we haven't already
306+
if (eqIdx < eqLen) {
307+
if (code === eq.charCodeAt(eqIdx)) {
308+
if (++eqIdx === eqLen) {
309+
// Key/value separator match!
310+
const end = i - eqIdx + 1;
311+
if (lastPos < end)
312+
key += qs.slice(lastPos, end);
313+
encodeCheck = 0;
314+
lastPos = i + 1;
315+
}
316+
continue;
317+
} else {
318+
eqIdx = 0;
319+
if (!keyEncoded) {
320+
// Try to match an (valid) encoded byte once to minimize unnecessary
321+
// calls to string decoding functions
322+
if (code === 37/*%*/) {
323+
encodeCheck = 1;
324+
} else if (encodeCheck > 0 &&
325+
((code >= 48/*0*/ && code <= 57/*9*/) ||
326+
(code >= 65/*A*/ && code <= 70/*Z*/) ||
327+
(code >= 97/*a*/ && code <= 102/*z*/))) {
328+
if (++encodeCheck === 3)
329+
keyEncoded = true;
330+
} else {
331+
encodeCheck = 0;
332+
}
333+
}
334+
}
335+
}
336+
337+
if (code === 43/*+*/) {
338+
if (eqIdx < eqLen) {
339+
if (i - lastPos > 0)
340+
key += qs.slice(lastPos, i);
341+
key += '%20';
342+
keyEncoded = true;
343+
} else {
344+
if (i - lastPos > 0)
345+
value += qs.slice(lastPos, i);
346+
value += '%20';
347+
valEncoded = true;
348+
}
349+
lastPos = i + 1;
350+
}
351+
}
352+
353+
// Check if we have leftover key or value data
354+
if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) {
355+
if (lastPos < qs.length) {
356+
if (eqIdx < eqLen)
357+
key += qs.slice(lastPos);
358+
else if (sepIdx < sepLen)
359+
value += qs.slice(lastPos);
360+
}
361+
if (keyEncoded)
362+
key = decodeStr(key, decode);
363+
if (valEncoded)
364+
value = decodeStr(value, decode);
365+
// Use a key array lookup instead of using hasOwnProperty(), which is
366+
// slower
367+
if (keys.indexOf(key) === -1) {
368+
obj[key] = value;
369+
keys[keys.length] = key;
256370
} else {
257-
obj[k] = [obj[k], v];
371+
const curValue = obj[key];
372+
// `instanceof Array` is used instead of Array.isArray() because it
373+
// is ~15-20% faster with v8 4.7 and is safe to use because we are
374+
// using it with values being created within this function
375+
if (curValue instanceof Array)
376+
curValue[curValue.length] = value;
377+
else
378+
obj[key] = [curValue, value];
258379
}
259380
}
260381

261382
return obj;
262383
};
263384

264385

265-
function replacePlus(str) {
266-
var ret = '';
267-
var start = 0;
268-
var i = -1;
269-
while ((i = str.indexOf('+', i + 1)) !== -1) {
270-
ret += str.slice(start, i);
271-
ret += '%20';
272-
start = i + 1;
273-
}
274-
if (start === 0)
275-
return str;
276-
if (start < str.length)
277-
ret += str.slice(start);
278-
return ret;
279-
}
280-
281-
282386
// v8 does not optimize functions with try-catch blocks, so we isolate them here
283387
// to minimize the damage
284388
function decodeStr(s, decoder) {

test/parallel/test-querystring.js

+3
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,6 @@ qs.unescape = function(str) {
248248
};
249249
assert.deepEqual(qs.parse('foo=bor'), {f__: 'b_r'});
250250
qs.unescape = prevUnescape;
251+
252+
// test separator and "equals" parsing order
253+
assert.deepEqual(qs.parse('foo&bar', '&', '&'), { foo: '', bar: '' });

0 commit comments

Comments
 (0)