Skip to content

Commit 711dad2

Browse files
targosevanlucas
authored andcommitted
url: improve spec compliance of WHATWG URL
This patch contains the following changes: url: make IPv4 parser more spec compliant * Return int64_t from ParseNumber to prevent overflow for valid big numbers * Don't throw when there are more than 4 parts (it cannot be an IP address) * Correctly interpret the address and don't always throw when there are numbers > 255 Ref: https://url.spec.whatwg.org/#concept-ipv4-parser Fixes: #10306 url: percent encode fragment to follow spec change Ref: whatwg/url#150 Ref: whatwg/url@373dbed url: fix URL#search setter The check for empty string must be done before removing the leading '?'. Ref: https://url.spec.whatwg.org/#dom-url-search url: set port to null if an empty string is given This is to follow a spec change. Ref: whatwg/url#113 url: fix parsing of paths with Windows drive letter test: update WHATWG URL test fixtures PR-URL: #10317 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Benjamin Gruenbaum <[email protected]>
1 parent 202c548 commit 711dad2

File tree

4 files changed

+429
-51
lines changed

4 files changed

+429
-51
lines changed

lib/internal/url.js

+2-3
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
444444
return;
445445
port = String(port);
446446
if (port === '') {
447-
// Currently, if port number is empty, left unchanged.
448-
// TODO(jasnell): This might be changing in the spec
447+
ctx.port = undefined;
449448
return;
450449
}
451450
binding.parse(port, binding.kPort, null, ctx,
@@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
478477
set(search) {
479478
const ctx = this[context];
480479
search = String(search);
481-
if (search[0] === '?') search = search.slice(1);
482480
if (!search) {
483481
ctx.query = null;
484482
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
485483
this[searchParams][searchParams] = {};
486484
return;
487485
}
486+
if (search[0] === '?') search = search.slice(1);
488487
ctx.query = '';
489488
binding.parse(search, binding.kQuery, null, ctx,
490489
onParseSearchComplete.bind(this));

src/node_url.cc

+37-27
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ namespace url {
261261
return type;
262262
}
263263

264-
static inline int ParseNumber(const char* start, const char* end) {
264+
static inline int64_t ParseNumber(const char* start, const char* end) {
265265
unsigned R = 10;
266266
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
267267
start += 2;
@@ -293,7 +293,7 @@ namespace url {
293293
}
294294
p++;
295295
}
296-
return strtol(start, NULL, R);
296+
return strtoll(start, NULL, R);
297297
}
298298

299299
static url_host_type ParseIPv4Host(url_host* host,
@@ -305,28 +305,25 @@ namespace url {
305305
const char* end = pointer + length;
306306
int parts = 0;
307307
uint32_t val = 0;
308-
unsigned numbers[4];
308+
uint64_t numbers[4];
309+
int tooBigNumbers = 0;
309310
if (length == 0)
310311
goto end;
311312

312313
while (pointer <= end) {
313314
const char ch = pointer < end ? pointer[0] : kEOL;
314315
const int remaining = end - pointer - 1;
315316
if (ch == '.' || ch == kEOL) {
316-
if (++parts > 4 || pointer - mark == 0)
317-
break;
318-
int n = ParseNumber(mark, pointer);
319-
if (n < 0) {
320-
type = HOST_TYPE_DOMAIN;
317+
if (++parts > 4)
321318
goto end;
322-
}
323-
if (pointer - mark == 10) {
324-
numbers[parts - 1] = n;
319+
if (pointer - mark == 0)
325320
break;
326-
}
327-
if (n > 255) {
328-
type = HOST_TYPE_FAILED;
321+
int64_t n = ParseNumber(mark, pointer);
322+
if (n < 0)
329323
goto end;
324+
325+
if (n > 255) {
326+
tooBigNumbers++;
330327
}
331328
numbers[parts - 1] = n;
332329
mark = pointer + 1;
@@ -335,14 +332,23 @@ namespace url {
335332
}
336333
pointer++;
337334
}
335+
CHECK_GT(parts, 0);
336+
337+
// If any but the last item in numbers is greater than 255, return failure.
338+
// If the last item in numbers is greater than or equal to
339+
// 256^(5 - the number of items in numbers), return failure.
340+
if (tooBigNumbers > 1 ||
341+
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
342+
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
343+
type = HOST_TYPE_FAILED;
344+
goto end;
345+
}
338346

339347
type = HOST_TYPE_IPV4;
340-
if (parts > 0) {
341-
val = numbers[parts - 1];
342-
for (int n = 0; n < parts - 1; n++) {
343-
double b = 3-n;
344-
val += numbers[n] * pow(256, b);
345-
}
348+
val = numbers[parts - 1];
349+
for (int n = 0; n < parts - 1; n++) {
350+
double b = 3 - n;
351+
val += numbers[n] * pow(256, b);
346352
}
347353

348354
host->value.ipv4 = val;
@@ -618,6 +624,13 @@ namespace url {
618624
}
619625
}
620626

627+
static inline void ShortenUrlPath(struct url_data* url) {
628+
if (url->path.empty()) return;
629+
if (url->path.size() == 1 && url->scheme == "file:" &&
630+
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
631+
url->path.pop_back();
632+
}
633+
621634
static void Parse(Environment* env,
622635
Local<Value> recv,
623636
const char* input,
@@ -895,8 +908,7 @@ namespace url {
895908
if (DOES_HAVE_PATH(base)) {
896909
SET_HAVE_PATH()
897910
url.path = base.path;
898-
if (!url.path.empty())
899-
url.path.pop_back();
911+
ShortenUrlPath(&url);
900912
}
901913
url.port = base.port;
902914
state = kPath;
@@ -1112,8 +1124,7 @@ namespace url {
11121124
SET_HAVE_PATH()
11131125
url.path = base.path;
11141126
}
1115-
if (!url.path.empty())
1116-
url.path.pop_back();
1127+
ShortenUrlPath(&url);
11171128
}
11181129
state = kPath;
11191130
continue;
@@ -1172,8 +1183,7 @@ namespace url {
11721183
special_back_slash ||
11731184
(!state_override && (ch == '?' || ch == '#'))) {
11741185
if (IsDoubleDotSegment(buffer)) {
1175-
if (!url.path.empty())
1176-
url.path.pop_back();
1186+
ShortenUrlPath(&url);
11771187
if (ch != '/' && !special_back_slash) {
11781188
SET_HAVE_PATH()
11791189
url.path.push_back("");
@@ -1247,7 +1257,7 @@ namespace url {
12471257
case 0:
12481258
break;
12491259
default:
1250-
buffer += ch;
1260+
AppendOrEscape(&buffer, ch, SimpleEncodeSet);
12511261
}
12521262
break;
12531263
default:

test/fixtures/url-setter-tests.json

+20-14
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@
347347
}
348348
},
349349
{
350-
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
350+
"comment": "Port number is unchanged if not specified",
351351
"href": "http://example.net:8080",
352352
"new_value": "example.com:",
353353
"expected": {
@@ -358,7 +358,6 @@
358358
}
359359
},
360360
{
361-
362361
"comment": "The empty host is not valid for special schemes",
363362
"href": "http://example.net",
364363
"new_value": "",
@@ -763,14 +762,14 @@
763762
}
764763
},
765764
{
766-
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
765+
"comment": "Port number is removed if empty is the new value",
767766
"href": "http://example.net:8080",
768767
"new_value": "",
769768
"expected": {
770-
"href": "http://example.net:8080/",
771-
"host": "example.net:8080",
769+
"href": "http://example.net/",
770+
"host": "example.net",
772771
"hostname": "example.net",
773-
"port": "8080"
772+
"port": ""
774773
}
775774
},
776775
{
@@ -975,6 +974,15 @@
975974
"href": "http://example.net/..%c3%89t%C3%A9",
976975
"pathname": "/..%c3%89t%C3%A9"
977976
}
977+
},
978+
{
979+
"comment": "? needs to be encoded",
980+
"href": "http://example.net",
981+
"new_value": "?",
982+
"expected": {
983+
"href": "http://example.net/%3F",
984+
"pathname": "/%3F"
985+
}
978986
}
979987
],
980988
"search": [
@@ -1011,7 +1019,6 @@
10111019
}
10121020
},
10131021
{
1014-
"skip": "we do not pass this, but we do match chromes behavior",
10151022
"href": "https://example.net?lang=en-US#nav",
10161023
"new_value": "?",
10171024
"expected": {
@@ -1096,7 +1103,6 @@
10961103
}
10971104
},
10981105
{
1099-
"skip": "we do not pass this, but we do match chromes behavior",
11001106
"href": "https://example.net?lang=en-US#nav",
11011107
"new_value": "#",
11021108
"expected": {
@@ -1113,22 +1119,22 @@
11131119
}
11141120
},
11151121
{
1116-
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
1122+
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
11171123
"href": "a:/",
11181124
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
11191125
"expected": {
1120-
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
1121-
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
1126+
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
1127+
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
11221128
}
11231129
},
11241130
{
11251131
"comment": "Bytes already percent-encoded are left as-is",
11261132
"href": "http://example.net",
11271133
"new_value": "%c3%89té",
11281134
"expected": {
1129-
"href": "http://example.net/#%c3%89té",
1130-
"hash": "#%c3%89té"
1135+
"href": "http://example.net/#%c3%89t%C3%A9",
1136+
"hash": "#%c3%89t%C3%A9"
11311137
}
11321138
}
11331139
]
1134-
}
1140+
}

0 commit comments

Comments
 (0)