@@ -133,6 +133,9 @@ enum url_error_cb_args {
133
133
// https://infra.spec.whatwg.org/#ascii-tab-or-newline
134
134
CHAR_TEST (8 , IsASCIITabOrNewline, (ch == ' \t ' || ch == ' \n ' || ch == ' \r ' ))
135
135
136
+ // https://infra.spec.whatwg.org/#c0-control-or-space
137
+ CHAR_TEST (8 , IsC0ControlOrSpace, (ch >= ' \0 ' && ch <= ' ' ))
138
+
136
139
// https://infra.spec.whatwg.org/#ascii-digit
137
140
CHAR_TEST (8 , IsASCIIDigit, (ch >= ' 0' && ch <= ' 9' ))
138
141
@@ -1134,15 +1137,45 @@ static inline void ShortenUrlPath(struct url_data* url) {
1134
1137
}
1135
1138
1136
1139
void URL::Parse (const char * input,
1137
- const size_t len,
1140
+ size_t len,
1138
1141
enum url_parse_state state_override,
1139
1142
struct url_data * url,
1143
+ bool has_url,
1140
1144
const struct url_data * base,
1141
1145
bool has_base) {
1146
+ const char * p = input;
1147
+ const char * end = input + len;
1148
+
1149
+ if (!has_url) {
1150
+ for (const char * ptr = p; ptr < end; ptr++) {
1151
+ if (IsC0ControlOrSpace (*ptr))
1152
+ p++;
1153
+ else
1154
+ break ;
1155
+ }
1156
+ for (const char * ptr = end - 1 ; ptr >= p; ptr--) {
1157
+ if (IsC0ControlOrSpace (*ptr))
1158
+ end--;
1159
+ else
1160
+ break ;
1161
+ }
1162
+ len = end - p;
1163
+ }
1164
+
1165
+ std::string whitespace_stripped;
1166
+ whitespace_stripped.reserve (len);
1167
+ for (const char * ptr = p; ptr < end; ptr++)
1168
+ if (!IsASCIITabOrNewline (*ptr))
1169
+ whitespace_stripped += *ptr;
1170
+
1171
+ input = whitespace_stripped.c_str ();
1172
+ len = whitespace_stripped.size ();
1173
+ p = input;
1174
+ end = input + len;
1175
+
1142
1176
bool atflag = false ;
1143
1177
bool sbflag = false ;
1144
1178
bool uflag = false ;
1145
- int wskip = 0 ;
1146
1179
1147
1180
std::string buffer;
1148
1181
url->scheme .reserve (len);
@@ -1159,9 +1192,6 @@ void URL::Parse(const char* input,
1159
1192
enum url_parse_state state = has_state_override ? state_override :
1160
1193
kSchemeStart ;
1161
1194
1162
- const char * p = input;
1163
- const char * end = input + len;
1164
-
1165
1195
if (state < kSchemeStart || state > kFragment ) {
1166
1196
url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1167
1197
return ;
@@ -1171,18 +1201,6 @@ void URL::Parse(const char* input,
1171
1201
const char ch = p < end ? p[0 ] : kEOL ;
1172
1202
const size_t remaining = end == p ? 0 : (end - p - 1 );
1173
1203
1174
- if (IsASCIITabOrNewline (ch)) {
1175
- if (state == kAuthority ) {
1176
- // It's necessary to keep track of how much whitespace
1177
- // is being ignored when in kAuthority state because of
1178
- // how the buffer is managed. TODO: See if there's a better
1179
- // way
1180
- wskip++;
1181
- }
1182
- p++;
1183
- continue ;
1184
- }
1185
-
1186
1204
bool special = (url->flags & URL_FLAGS_SPECIAL);
1187
1205
bool cannot_be_base;
1188
1206
const bool special_back_slash = (special && ch == ' \\ ' );
@@ -1500,7 +1518,7 @@ void URL::Parse(const char* input,
1500
1518
url->flags |= URL_FLAGS_FAILED;
1501
1519
return ;
1502
1520
}
1503
- p -= buffer.size () + 1 + wskip ;
1521
+ p -= buffer.size () + 1 ;
1504
1522
buffer.clear ();
1505
1523
state = kHost ;
1506
1524
} else {
@@ -1892,16 +1910,17 @@ static void Parse(Environment* env,
1892
1910
HandleScope handle_scope (isolate);
1893
1911
Context::Scope context_scope (context);
1894
1912
1913
+ const bool has_context = context_obj->IsObject ();
1895
1914
const bool has_base = base_obj->IsObject ();
1896
1915
1897
1916
struct url_data base;
1898
1917
struct url_data url;
1899
- if (context_obj-> IsObject () )
1918
+ if (has_context )
1900
1919
HarvestContext (env, &url, context_obj.As <Object>());
1901
1920
if (has_base)
1902
1921
HarvestBase (env, &base, base_obj.As <Object>());
1903
1922
1904
- URL::Parse (input, len, state_override, &url, &base, has_base);
1923
+ URL::Parse (input, len, state_override, &url, has_context, &base, has_base);
1905
1924
if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
1906
1925
((state_override != kUnknownState ) &&
1907
1926
(url.flags & URL_FLAGS_TERMINATED)))
0 commit comments