-
Notifications
You must be signed in to change notification settings - Fork 31.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
http: optimize checkIsHttpToken() and checkInvalidHeaderChar() #6570
Changes from all commits
918159e
83432bf
39fdf07
c8fa79f
c570182
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
'use strict'; | ||
|
||
const common = require('../common.js'); | ||
const _checkInvalidHeaderChar = require('_http_common')._checkInvalidHeaderChar; | ||
|
||
const bench = common.createBenchmark(main, { | ||
key: [ | ||
// Valid | ||
'', | ||
'1', | ||
'\t\t\t\t\t\t\t\t\t\tFoo bar baz', | ||
'keep-alive', | ||
'close', | ||
'gzip', | ||
'20091', | ||
'private', | ||
'text/html; charset=utf-8', | ||
'text/plain', | ||
'Sat, 07 May 2016 16:54:48 GMT', | ||
'SAMEORIGIN', | ||
'en-US', | ||
|
||
// Invalid | ||
'Here is a value that is really a folded header value\r\n this should be \ | ||
supported, but it is not currently', | ||
'中文呢', // unicode | ||
'foo\nbar', | ||
'\x7F' | ||
], | ||
n: [5e8], | ||
}); | ||
|
||
function main(conf) { | ||
var n = +conf.n; | ||
var key = conf.key; | ||
|
||
bench.start(); | ||
for (var i = 0; i < n; i++) { | ||
_checkInvalidHeaderChar(key); | ||
} | ||
bench.end(n); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -232,52 +232,65 @@ exports.httpSocketSetup = httpSocketSetup; | |
* per the rules defined in RFC 7230 | ||
* See https://tools.ietf.org/html/rfc7230#section-3.2.6 | ||
* | ||
* Allowed characters in an HTTP token: | ||
* ^_`a-z 94-122 | ||
* A-Z 65-90 | ||
* - 45 | ||
* 0-9 48-57 | ||
* ! 33 | ||
* #$%&' 35-39 | ||
* *+ 42-43 | ||
* . 46 | ||
* | 124 | ||
* ~ 126 | ||
* | ||
* This implementation of checkIsHttpToken() loops over the string instead of | ||
* using a regular expression since the former is up to 180% faster with v8 4.9 | ||
* depending on the string length (the shorter the string, the larger the | ||
* performance difference) | ||
* | ||
* Additionally, checkIsHttpToken() is currently designed to be inlinable by v8, | ||
* so take care when making changes to the implementation so that the source | ||
* code size does not exceed v8's default max_inlined_source_size setting. | ||
**/ | ||
function isValidTokenChar(ch) { | ||
if (ch >= 94 && ch <= 122) | ||
return true; | ||
if (ch >= 65 && ch <= 90) | ||
return true; | ||
if (ch === 45) | ||
return true; | ||
if (ch >= 48 && ch <= 57) | ||
return true; | ||
if (ch === 34 || ch === 40 || ch === 41 || ch === 44) | ||
return false; | ||
if (ch >= 33 && ch <= 46) | ||
return true; | ||
if (ch === 124 || ch === 126) | ||
return true; | ||
return false; | ||
} | ||
function checkIsHttpToken(val) { | ||
if (typeof val !== 'string' || val.length === 0) | ||
return false; | ||
|
||
for (var i = 0, len = val.length; i < len; i++) { | ||
var ch = val.charCodeAt(i); | ||
|
||
if (ch >= 65 && ch <= 90) // A-Z | ||
continue; | ||
|
||
if (ch >= 97 && ch <= 122) // a-z | ||
continue; | ||
|
||
// ^ => 94 | ||
// _ => 95 | ||
// ` => 96 | ||
// | => 124 | ||
// ~ => 126 | ||
if (ch === 94 || ch === 95 || ch === 96 || ch === 124 || ch === 126) | ||
continue; | ||
|
||
if (ch >= 48 && ch <= 57) // 0-9 | ||
continue; | ||
|
||
// ! => 33 | ||
// # => 35 | ||
// $ => 36 | ||
// % => 37 | ||
// & => 38 | ||
// ' => 39 | ||
// * => 42 | ||
// + => 43 | ||
// - => 45 | ||
// . => 46 | ||
if (ch >= 33 && ch <= 46) { | ||
if (ch === 34 || ch === 40 || ch === 41 || ch === 44) | ||
if (!isValidTokenChar(val.charCodeAt(0))) | ||
return false; | ||
const len = val.length; | ||
if (len > 1) { | ||
if (!isValidTokenChar(val.charCodeAt(1))) | ||
return false; | ||
if (len > 2) { | ||
if (!isValidTokenChar(val.charCodeAt(2))) | ||
return false; | ||
continue; | ||
if (len > 3) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this the common case? Can you quantify how much the manual loop unrolling helps? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't save the old results, but there was a lesser performance improvement with the pure loop implementation. I can run it again without and post the results here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok here are the results with just the loop, no unrolling:
|
||
if (!isValidTokenChar(val.charCodeAt(3))) | ||
return false; | ||
for (var i = 4; i < len; i++) { | ||
if (!isValidTokenChar(val.charCodeAt(i))) | ||
return false; | ||
} | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
return true; | ||
} | ||
|
@@ -288,13 +301,32 @@ exports._checkIsHttpToken = checkIsHttpToken; | |
* field-value = *( field-content / obs-fold ) | ||
* field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] | ||
* field-vchar = VCHAR / obs-text | ||
* | ||
* checkInvalidHeaderChar() is currently designed to be inlinable by v8, | ||
* so take care when making changes to the implementation so that the source | ||
* code size does not exceed v8's default max_inlined_source_size setting. | ||
**/ | ||
function checkInvalidHeaderChar(val) { | ||
val = '' + val; | ||
for (var i = 0; i < val.length; i++) { | ||
const ch = val.charCodeAt(i); | ||
if (ch === 9) continue; | ||
if (ch <= 31 || ch > 255 || ch === 127) return true; | ||
val += ''; | ||
if (val.length < 1) | ||
return false; | ||
var c = val.charCodeAt(0); | ||
if ((c <= 31 && c !== 9) || c > 255 || c === 127) | ||
return true; | ||
if (val.length < 2) | ||
return false; | ||
c = val.charCodeAt(1); | ||
if ((c <= 31 && c !== 9) || c > 255 || c === 127) | ||
return true; | ||
if (val.length < 3) | ||
return false; | ||
c = val.charCodeAt(2); | ||
if ((c <= 31 && c !== 9) || c > 255 || c === 127) | ||
return true; | ||
for (var i = 3; i < val.length; ++i) { | ||
c = val.charCodeAt(i); | ||
if ((c <= 31 && c !== 9) || c > 255 || c === 127) | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
'use strict'; | ||
require('../common'); | ||
const assert = require('assert'); | ||
const inspect = require('util').inspect; | ||
const checkIsHttpToken = require('_http_common')._checkIsHttpToken; | ||
const checkInvalidHeaderChar = require('_http_common')._checkInvalidHeaderChar; | ||
|
||
// Good header field names | ||
[ | ||
'TCN', | ||
'ETag', | ||
'date', | ||
'alt-svc', | ||
'Content-Type', | ||
'0', | ||
'Set-Cookie2', | ||
'Set_Cookie', | ||
'foo`bar^', | ||
'foo|bar', | ||
'~foobar', | ||
'FooBar!', | ||
'#Foo', | ||
'$et-Cookie', | ||
'%%Test%%', | ||
'Test&123', | ||
'It\'s_fun', | ||
'2*3', | ||
'4+2', | ||
'3.14159265359' | ||
].forEach(function(str) { | ||
assert.strictEqual(checkIsHttpToken(str), | ||
true, | ||
'checkIsHttpToken(' + | ||
inspect(str) + | ||
') unexpectedly failed'); | ||
}); | ||
// Bad header field names | ||
[ | ||
':', | ||
'@@', | ||
'中文呢', // unicode | ||
'((((())))', | ||
':alternate-protocol', | ||
'alternate-protocol:', | ||
'foo\nbar', | ||
'foo\rbar', | ||
'foo\r\nbar', | ||
'foo\x00bar', | ||
'\x7FMe!', | ||
'{Start', | ||
'(Start', | ||
'[Start', | ||
'End}', | ||
'End)', | ||
'End]', | ||
'"Quote"', | ||
'This,That' | ||
].forEach(function(str) { | ||
assert.strictEqual(checkIsHttpToken(str), | ||
false, | ||
'checkIsHttpToken(' + | ||
inspect(str) + | ||
') unexpectedly succeeded'); | ||
}); | ||
|
||
|
||
// Good header field values | ||
[ | ||
'foo bar', | ||
'foo\tbar', | ||
'0123456789ABCdef', | ||
'!@#$%^&*()-_=+\\;\':"[]{}<>,./?|~`' | ||
].forEach(function(str) { | ||
assert.strictEqual(checkInvalidHeaderChar(str), | ||
false, | ||
'checkInvalidHeaderChar(' + | ||
inspect(str) + | ||
') unexpectedly failed'); | ||
}); | ||
|
||
// Bad header field values | ||
[ | ||
'foo\rbar', | ||
'foo\nbar', | ||
'foo\r\nbar', | ||
'中文呢', // unicode | ||
'\x7FMe!', | ||
'Testing 123\x00', | ||
'foo\vbar', | ||
'Ding!\x07' | ||
].forEach(function(str) { | ||
assert.strictEqual(checkInvalidHeaderChar(str), | ||
true, | ||
'checkInvalidHeaderChar(' + | ||
inspect(str) + | ||
') unexpectedly succeeded'); | ||
}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Either the changes to this file should be a separate commit or the commit log should explain why they are necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Initially I had planned it to be in a separate commit but there would then technically be a gap where the new benchmark would be broken (causing the benchmark runner to get in a large loop actually), especially if the benchmark would be cherry picked. I can amend the commit message...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, commit message updated.