Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

url: make WHATWG URL implementation more spec compliant #10317

Merged
merged 1 commit into from
Jan 1, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions lib/internal/url.js
Original file line number Diff line number Diff line change
@@ -444,8 +444,7 @@ Object.defineProperties(URL.prototype, {
return;
port = String(port);
if (port === '') {
// Currently, if port number is empty, left unchanged.
// TODO(jasnell): This might be changing in the spec
ctx.port = undefined;
return;
}
binding.parse(port, binding.kPort, null, ctx,
@@ -478,13 +477,13 @@ Object.defineProperties(URL.prototype, {
set(search) {
const ctx = this[context];
search = String(search);
if (search[0] === '?') search = search.slice(1);
if (!search) {
ctx.query = null;
ctx.flags &= ~binding.URL_FLAGS_HAS_QUERY;
this[searchParams][searchParams] = {};
return;
}
if (search[0] === '?') search = search.slice(1);
ctx.query = '';
binding.parse(search, binding.kQuery, null, ctx,
onParseSearchComplete.bind(this));
64 changes: 37 additions & 27 deletions src/node_url.cc
Original file line number Diff line number Diff line change
@@ -261,7 +261,7 @@ namespace url {
return type;
}

static inline int ParseNumber(const char* start, const char* end) {
static inline int64_t ParseNumber(const char* start, const char* end) {
unsigned R = 10;
if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
start += 2;
@@ -293,7 +293,7 @@ namespace url {
}
p++;
}
return strtol(start, NULL, R);
return strtoll(start, NULL, R);
}

static url_host_type ParseIPv4Host(url_host* host,
@@ -305,28 +305,25 @@ namespace url {
const char* end = pointer + length;
int parts = 0;
uint32_t val = 0;
unsigned numbers[4];
uint64_t numbers[4];
int tooBigNumbers = 0;
if (length == 0)
goto end;

while (pointer <= end) {
const char ch = pointer < end ? pointer[0] : kEOL;
const int remaining = end - pointer - 1;
if (ch == '.' || ch == kEOL) {
if (++parts > 4 || pointer - mark == 0)
break;
int n = ParseNumber(mark, pointer);
if (n < 0) {
type = HOST_TYPE_DOMAIN;
if (++parts > 4)
goto end;
}
if (pointer - mark == 10) {
numbers[parts - 1] = n;
if (pointer - mark == 0)
break;
}
if (n > 255) {
type = HOST_TYPE_FAILED;
int64_t n = ParseNumber(mark, pointer);
if (n < 0)
goto end;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasnell Just wondering if you remember what this condition was for? Maybe it was a mistake to remove it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not off the top of my head... I'll be able to take a look in the next day or two. (sorry, company in town for the holiday is taking up quite a bit of my time this week)


if (n > 255) {
tooBigNumbers++;
}
numbers[parts - 1] = n;
mark = pointer + 1;
@@ -335,14 +332,23 @@ namespace url {
}
pointer++;
}
CHECK_GT(parts, 0);

// If any but the last item in numbers is greater than 255, return failure.
// If the last item in numbers is greater than or equal to
// 256^(5 - the number of items in numbers), return failure.
if (tooBigNumbers > 1 ||
(tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
type = HOST_TYPE_FAILED;
goto end;
}

type = HOST_TYPE_IPV4;
if (parts > 0) {
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3-n;
val += numbers[n] * pow(256, b);
}
val = numbers[parts - 1];
for (int n = 0; n < parts - 1; n++) {
double b = 3 - n;
val += numbers[n] * pow(256, b);
}

host->value.ipv4 = val;
@@ -618,6 +624,13 @@ namespace url {
}
}

static inline void ShortenUrlPath(struct url_data* url) {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it better to use a const reference?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possibly, no strong opinion

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it cannot be a const because the url is modified in the function. Non-const references are not allowed by the linter, so I'll keep the pointer.

if (url->path.empty()) return;
if (url->path.size() == 1 && url->scheme == "file:" &&
NORMALIZED_WINDOWS_DRIVE_LETTER(url->path[0])) return;
url->path.pop_back();
}

static void Parse(Environment* env,
Local<Value> recv,
const char* input,
@@ -895,8 +908,7 @@ namespace url {
if (DOES_HAVE_PATH(base)) {
SET_HAVE_PATH()
url.path = base.path;
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
url.port = base.port;
state = kPath;
@@ -1112,8 +1124,7 @@ namespace url {
SET_HAVE_PATH()
url.path = base.path;
}
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
}
state = kPath;
continue;
@@ -1172,8 +1183,7 @@ namespace url {
special_back_slash ||
(!state_override && (ch == '?' || ch == '#'))) {
if (IsDoubleDotSegment(buffer)) {
if (!url.path.empty())
url.path.pop_back();
ShortenUrlPath(&url);
if (ch != '/' && !special_back_slash) {
SET_HAVE_PATH()
url.path.push_back("");
@@ -1247,7 +1257,7 @@ namespace url {
case 0:
break;
default:
buffer += ch;
AppendOrEscape(&buffer, ch, SimpleEncodeSet);
}
break;
default:
34 changes: 20 additions & 14 deletions test/fixtures/url-setter-tests.json
Original file line number Diff line number Diff line change
@@ -347,7 +347,7 @@
}
},
{
"comment": "Port number is unchanges if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is unchanged if not specified",
"href": "http://example.net:8080",
"new_value": "example.com:",
"expected": {
@@ -358,7 +358,6 @@
}
},
{

"comment": "The empty host is not valid for special schemes",
"href": "http://example.net",
"new_value": "",
@@ -763,14 +762,14 @@
}
},
{
"comment": "Port number is unchanged if empty in the new value. Note: this may change, see https://github.com/whatwg/url/pull/113",
"comment": "Port number is removed if empty is the new value",
"href": "http://example.net:8080",
"new_value": "",
"expected": {
"href": "http://example.net:8080/",
"host": "example.net:8080",
"href": "http://example.net/",
"host": "example.net",
"hostname": "example.net",
"port": "8080"
"port": ""
}
},
{
@@ -975,6 +974,15 @@
"href": "http://example.net/..%c3%89t%C3%A9",
"pathname": "/..%c3%89t%C3%A9"
}
},
{
"comment": "? needs to be encoded",
"href": "http://example.net",
"new_value": "?",
"expected": {
"href": "http://example.net/%3F",
"pathname": "/%3F"
}
}
],
"search": [
@@ -1011,7 +1019,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "?",
"expected": {
@@ -1096,7 +1103,6 @@
}
},
{
"skip": "we do not pass this, but we do match chromes behavior",
"href": "https://example.net?lang=en-US#nav",
"new_value": "#",
"expected": {
@@ -1113,22 +1119,22 @@
}
},
{
"comment": "No percent-encoding at all (!); nuls, tabs, and newlines are removed",
"comment": "Simple percent-encoding; nuls, tabs, and newlines are removed",
"href": "a:/",
"new_value": "\u0000\u0001\t\n\r\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"expected": {
"href": "a:/#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé",
"hash": "#\u0001\u001f !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~\u007f\u0080\u0081Éé"
"href": "a:/#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9",
"hash": "#%01%1F !\"#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~%7F%C2%80%C2%81%C3%89%C3%A9"
}
},
{
"comment": "Bytes already percent-encoded are left as-is",
"href": "http://example.net",
"new_value": "%c3%89té",
"expected": {
"href": "http://example.net/#%c3%89té",
"hash": "#%c3%89té"
"href": "http://example.net/#%c3%89t%C3%A9",
"hash": "#%c3%89t%C3%A9"
}
}
]
}
}
377 changes: 370 additions & 7 deletions test/fixtures/url-tests.json
Original file line number Diff line number Diff line change
@@ -842,6 +842,36 @@
"search": "",
"hash": ""
},
{
"input": "http://[::127.0.0.1]",
"base": "http://example.org/foo/bar",
"href": "http://[::7f00:1]/",
"origin": "http://[::7f00:1]",
"protocol": "http:",
"username": "",
"password": "",
"host": "[::7f00:1]",
"hostname": "[::7f00:1]",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://[0:0:0:0:0:0:13.1.68.3]",
"base": "http://example.org/foo/bar",
"href": "http://[::d01:4403]/",
"origin": "http://[::d01:4403]",
"protocol": "http:",
"username": "",
"password": "",
"host": "[::d01:4403]",
"hostname": "[::d01:4403]",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://[2001::1]:80",
"base": "http://example.org/foo/bar",
@@ -1264,7 +1294,7 @@
{
"input": "",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar#β",
"href": "http://example.org/foo/bar#%CE%B2",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
@@ -1274,7 +1304,7 @@
"port": "",
"pathname": "/foo/bar",
"search": "",
"hash": "#β"
"hash": "#%CE%B2"
},
{
"input": "data:text/html,test#test",
@@ -1291,6 +1321,21 @@
"search": "",
"hash": "#test"
},
{
"input": "tel:1234567890",
"base": "http://example.org/foo/bar",
"href": "tel:1234567890",
"origin": "null",
"protocol": "tel:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "1234567890",
"search": "",
"hash": ""
},
"# Based on http://trac.webkit.org/browser/trunk/LayoutTests/fast/url/file.html",
{
"input": "file:c:\\foo\\bar.html",
@@ -2118,7 +2163,7 @@
{
"input": "http://www.google.com/foo?bar=baz# »",
"base": "about:blank",
"href": "http://www.google.com/foo?bar=baz# »",
"href": "http://www.google.com/foo?bar=baz# %C2%BB",
"origin": "http://www.google.com",
"protocol": "http:",
"username": "",
@@ -2128,12 +2173,12 @@
"port": "",
"pathname": "/foo",
"search": "?bar=baz",
"hash": "# »"
"hash": "# %C2%BB"
},
{
"input": "data:test# »",
"base": "about:blank",
"href": "data:test# »",
"href": "data:test# %C2%BB",
"origin": "null",
"protocol": "data:",
"username": "",
@@ -2143,7 +2188,7 @@
"port": "",
"pathname": "test",
"search": "",
"hash": "# »"
"hash": "# %C2%BB"
},
{
"input": "http://[www.google.com]/",
@@ -4165,6 +4210,22 @@
"search": "",
"hash": ""
},
"# unknown scheme with path looking like a password",
{
"input": "sc::a@example.net",
"base": "about:blank",
"href": "sc::a@example.net",
"origin": "null",
"protocol": "sc:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": ":a@example.net",
"search": "",
"hash": ""
},
"# tests from jsdom/whatwg-url designed for code coverage",
{
"input": "http://127.0.0.1:10100/relative_import.html",
@@ -4226,5 +4287,307 @@
"pathname": "/path",
"search": "?query",
"hash": "#frag"
},
"# Stringification of URL.searchParams",
{
"input": "?a=b&c=d",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar?a=b&c=d",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "?a=b&c=d",
"searchParams": "a=b&c=d",
"hash": ""
},
{
"input": "??a=b&c=d",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar??a=b&c=d",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "??a=b&c=d",
"searchParams": "%3Fa=b&c=d",
"hash": ""
},
"# Scheme only",
{
"input": "http:",
"base": "http://example.org/foo/bar",
"href": "http://example.org/foo/bar",
"origin": "http://example.org",
"protocol": "http:",
"username": "",
"password": "",
"host": "example.org",
"hostname": "example.org",
"port": "",
"pathname": "/foo/bar",
"search": "",
"searchParams": "",
"hash": ""
},
{
"input": "http:",
"base": "https://example.org/foo/bar",
"failure": true
},
{
"input": "sc:",
"base": "https://example.org/foo/bar",
"href": "sc:",
"origin": "null",
"protocol": "sc:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "",
"search": "",
"searchParams": "",
"hash": ""
},
"# Percent encoding of fragments",
{
"input": "http://foo.bar/baz?qux#foo\bbar",
"base": "about:blank",
"href": "http://foo.bar/baz?qux#foo%08bar",
"origin": "http://foo.bar",
"protocol": "http:",
"username": "",
"password": "",
"host": "foo.bar",
"hostname": "foo.bar",
"port": "",
"pathname": "/baz",
"search": "?qux",
"searchParams": "",
"hash": "#foo%08bar"
},
"# IPv4 parsing (via https://github.com/nodejs/node/pull/10317)",
{
"input": "http://192.168.257",
"base": "http://other.com/",
"href": "http://192.168.1.1/",
"origin": "http://192.168.1.1",
"protocol": "http:",
"username": "",
"password": "",
"host": "192.168.1.1",
"hostname": "192.168.1.1",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://192.168.257.com",
"base": "http://other.com/",
"href": "http://192.168.257.com/",
"origin": "http://192.168.257.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "192.168.257.com",
"hostname": "192.168.257.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://256",
"base": "http://other.com/",
"href": "http://0.0.1.0/",
"origin": "http://0.0.1.0",
"protocol": "http:",
"username": "",
"password": "",
"host": "0.0.1.0",
"hostname": "0.0.1.0",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://256.com",
"base": "http://other.com/",
"href": "http://256.com/",
"origin": "http://256.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "256.com",
"hostname": "256.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://999999999",
"base": "http://other.com/",
"href": "http://59.154.201.255/",
"origin": "http://59.154.201.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "59.154.201.255",
"hostname": "59.154.201.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://999999999.com",
"base": "http://other.com/",
"href": "http://999999999.com/",
"origin": "http://999999999.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "999999999.com",
"hostname": "999999999.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://10000000000",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://10000000000.com",
"base": "http://other.com/",
"href": "http://10000000000.com/",
"origin": "http://10000000000.com",
"protocol": "http:",
"username": "",
"password": "",
"host": "10000000000.com",
"hostname": "10000000000.com",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://4294967295",
"base": "http://other.com/",
"href": "http://255.255.255.255/",
"origin": "http://255.255.255.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "255.255.255.255",
"hostname": "255.255.255.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://4294967296",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://0xffffffff",
"base": "http://other.com/",
"href": "http://255.255.255.255/",
"origin": "http://255.255.255.255",
"protocol": "http:",
"username": "",
"password": "",
"host": "255.255.255.255",
"hostname": "255.255.255.255",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
{
"input": "http://0xffffffff1",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://256.256.256.256",
"base": "http://other.com/",
"failure": true
},
{
"input": "http://256.256.256.256.256",
"base": "http://other.com/",
"href": "http://256.256.256.256.256/",
"origin": "http://256.256.256.256.256",
"protocol": "http:",
"username": "",
"password": "",
"host": "256.256.256.256.256",
"hostname": "256.256.256.256.256",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
},
"# file URLs relative to other file URLs (via https://github.com/jsdom/whatwg-url/pull/60)",
{
"input": "pix/submit.gif",
"base": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/anchor.html",
"href": "file:///C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/Users/Domenic/Dropbox/GitHub/tmpvar/jsdom/test/level2/html/files/pix/submit.gif",
"search": "",
"hash": ""
},
{
"input": "..",
"base": "file:///C:/",
"href": "file:///C:/",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/C:/",
"search": "",
"hash": ""
},
{
"input": "..",
"base": "file:///",
"href": "file:///",
"protocol": "file:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "/",
"search": "",
"hash": ""
}
]
]