Skip to content

Commit 841bb4c

Browse files
committed
url: fix C0 control and whitespace handling
PR-URL: #12846 Fixes: #12825 Refs: web-platform-tests/wpt#5792 Reviewed-By: James M Snell <[email protected]> Reviewed-By: Joyee Cheung <[email protected]>
1 parent 5254975 commit 841bb4c

File tree

3 files changed

+81
-27
lines changed

3 files changed

+81
-27
lines changed

src/node_url.cc

+39-20
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ enum url_error_cb_args {
133133
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
134134
CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
135135

136+
// https://infra.spec.whatwg.org/#c0-control-or-space
137+
CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
138+
136139
// https://infra.spec.whatwg.org/#ascii-digit
137140
CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
138141

@@ -1134,15 +1137,45 @@ static inline void ShortenUrlPath(struct url_data* url) {
11341137
}
11351138

11361139
void URL::Parse(const char* input,
1137-
const size_t len,
1140+
size_t len,
11381141
enum url_parse_state state_override,
11391142
struct url_data* url,
1143+
bool has_url,
11401144
const struct url_data* base,
11411145
bool has_base) {
1146+
const char* p = input;
1147+
const char* end = input + len;
1148+
1149+
if (!has_url) {
1150+
for (const char* ptr = p; ptr < end; ptr++) {
1151+
if (IsC0ControlOrSpace(*ptr))
1152+
p++;
1153+
else
1154+
break;
1155+
}
1156+
for (const char* ptr = end - 1; ptr >= p; ptr--) {
1157+
if (IsC0ControlOrSpace(*ptr))
1158+
end--;
1159+
else
1160+
break;
1161+
}
1162+
len = end - p;
1163+
}
1164+
1165+
std::string whitespace_stripped;
1166+
whitespace_stripped.reserve(len);
1167+
for (const char* ptr = p; ptr < end; ptr++)
1168+
if (!IsASCIITabOrNewline(*ptr))
1169+
whitespace_stripped += *ptr;
1170+
1171+
input = whitespace_stripped.c_str();
1172+
len = whitespace_stripped.size();
1173+
p = input;
1174+
end = input + len;
1175+
11421176
bool atflag = false;
11431177
bool sbflag = false;
11441178
bool uflag = false;
1145-
int wskip = 0;
11461179

11471180
std::string buffer;
11481181
url->scheme.reserve(len);
@@ -1159,9 +1192,6 @@ void URL::Parse(const char* input,
11591192
enum url_parse_state state = has_state_override ? state_override :
11601193
kSchemeStart;
11611194

1162-
const char* p = input;
1163-
const char* end = input + len;
1164-
11651195
if (state < kSchemeStart || state > kFragment) {
11661196
url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
11671197
return;
@@ -1171,18 +1201,6 @@ void URL::Parse(const char* input,
11711201
const char ch = p < end ? p[0] : kEOL;
11721202
const size_t remaining = end == p ? 0 : (end - p - 1);
11731203

1174-
if (IsASCIITabOrNewline(ch)) {
1175-
if (state == kAuthority) {
1176-
// It's necessary to keep track of how much whitespace
1177-
// is being ignored when in kAuthority state because of
1178-
// how the buffer is managed. TODO: See if there's a better
1179-
// way
1180-
wskip++;
1181-
}
1182-
p++;
1183-
continue;
1184-
}
1185-
11861204
bool special = (url->flags & URL_FLAGS_SPECIAL);
11871205
bool cannot_be_base;
11881206
const bool special_back_slash = (special && ch == '\\');
@@ -1500,7 +1518,7 @@ void URL::Parse(const char* input,
15001518
url->flags |= URL_FLAGS_FAILED;
15011519
return;
15021520
}
1503-
p -= buffer.size() + 1 + wskip;
1521+
p -= buffer.size() + 1;
15041522
buffer.clear();
15051523
state = kHost;
15061524
} else {
@@ -1892,16 +1910,17 @@ static void Parse(Environment* env,
18921910
HandleScope handle_scope(isolate);
18931911
Context::Scope context_scope(context);
18941912

1913+
const bool has_context = context_obj->IsObject();
18951914
const bool has_base = base_obj->IsObject();
18961915

18971916
struct url_data base;
18981917
struct url_data url;
1899-
if (context_obj->IsObject())
1918+
if (has_context)
19001919
HarvestContext(env, &url, context_obj.As<Object>());
19011920
if (has_base)
19021921
HarvestBase(env, &base, base_obj.As<Object>());
19031922

1904-
URL::Parse(input, len, state_override, &url, &base, has_base);
1923+
URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
19051924
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
19061925
((state_override != kUnknownState) &&
19071926
(url.flags & URL_FLAGS_TERMINATED)))

src/node_url.h

+11-6
Original file line numberDiff line numberDiff line change
@@ -81,30 +81,35 @@ struct url_data {
8181
class URL {
8282
public:
8383
static void Parse(const char* input,
84-
const size_t len,
84+
size_t len,
8585
enum url_parse_state state_override,
8686
struct url_data* url,
87+
bool has_url,
8788
const struct url_data* base,
8889
bool has_base);
8990

9091
URL(const char* input, const size_t len) {
91-
Parse(input, len, kUnknownState, &context_, nullptr, false);
92+
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
9293
}
9394

9495
URL(const char* input, const size_t len, const URL* base) {
9596
if (base != nullptr)
96-
Parse(input, len, kUnknownState, &context_, &(base->context_), true);
97+
Parse(input, len, kUnknownState,
98+
&context_, false,
99+
&(base->context_), true);
97100
else
98-
Parse(input, len, kUnknownState, &context_, nullptr, false);
101+
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
99102
}
100103

101104
URL(const char* input, const size_t len,
102105
const char* base, const size_t baselen) {
103106
if (base != nullptr && baselen > 0) {
104107
URL _base(base, baselen);
105-
Parse(input, len, kUnknownState, &context_, &(_base.context_), true);
108+
Parse(input, len, kUnknownState,
109+
&context_, false,
110+
&(_base.context_), true);
106111
} else {
107-
Parse(input, len, kUnknownState, &context_, nullptr, false);
112+
Parse(input, len, kUnknownState, &context_, false, nullptr, false);
108113
}
109114
}
110115

test/fixtures/url-tests.js

+31-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
'use strict';
22

33
/* WPT Refs:
4-
https://github.com/w3c/web-platform-tests/blob/28541bb/url/urltestdata.json
4+
https://github.com/w3c/web-platform-tests/blob/0f26c418a5/url/urltestdata.json
55
License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html
66
*/
77
module.exports =
@@ -3566,6 +3566,22 @@ module.exports =
35663566
"search": "",
35673567
"hash": ""
35683568
},
3569+
"Leading and trailing C0 control or space",
3570+
{
3571+
"input": "\u0000\u001b\u0004\u0012 http://example.com/\u001f \u000d ",
3572+
"base": "about:blank",
3573+
"href": "http://example.com/",
3574+
"origin": "http://example.com",
3575+
"protocol": "http:",
3576+
"username": "",
3577+
"password": "",
3578+
"host": "example.com",
3579+
"hostname": "example.com",
3580+
"port": "",
3581+
"pathname": "/",
3582+
"search": "",
3583+
"hash": ""
3584+
},
35693585
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
35703586
{
35713587
"input": "http://www.foo。bar.com",
@@ -5487,6 +5503,20 @@ module.exports =
54875503
"search": "",
54885504
"hash": ""
54895505
},
5506+
{
5507+
"input": "C|\n/",
5508+
"base": "file://host/dir/file",
5509+
"href": "file:///C:/",
5510+
"protocol": "file:",
5511+
"username": "",
5512+
"password": "",
5513+
"host": "",
5514+
"hostname": "",
5515+
"port": "",
5516+
"pathname": "/C:/",
5517+
"search": "",
5518+
"hash": ""
5519+
},
54905520
{
54915521
"input": "C|\\",
54925522
"base": "file://host/dir/file",

0 commit comments

Comments
 (0)