Skip to content

Commit d099f8e

Browse files
committed
src: remove explicit UTF-8 validity check in url
This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: #11859 Refs: c2a302c "src: do not ignore IDNA conversion error" Refs: https://url.spec.whatwg.org/#concept-host-parser Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: Daijiro Wachi <[email protected]> Reviewed-By: Anna Henningsen <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Colin Ihrig <[email protected]>
1 parent 4cdb0e8 commit d099f8e

File tree

1 file changed

+0
-30
lines changed

1 file changed

+0
-30
lines changed

src/node_url.cc

-30
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@
1515
#include <stdio.h>
1616
#include <cmath>
1717

18-
#if defined(NODE_HAVE_I18N_SUPPORT)
19-
#include <unicode/utf8.h>
20-
#include <unicode/utf.h>
21-
#endif
22-
2318
#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
2419

2520
namespace node {
@@ -113,21 +108,6 @@ namespace url {
113108
output->assign(*buf, buf.length());
114109
return true;
115110
}
116-
117-
// Unfortunately there's not really a better way to do this.
118-
// Iterate through each encoded codepoint and verify that
119-
// it is a valid unicode codepoint.
120-
static bool IsValidUTF8(std::string* input) {
121-
const char* p = input->c_str();
122-
int32_t len = input->length();
123-
for (int32_t i = 0; i < len;) {
124-
UChar32 c;
125-
U8_NEXT_UNSAFE(p, i, c);
126-
if (!U_IS_UNICODE_CHAR(c))
127-
return false;
128-
}
129-
return true;
130-
}
131111
#else
132112
// Intentional non-ops if ICU is not present.
133113
static bool ToUnicode(std::string* input, std::string* output) {
@@ -139,10 +119,6 @@ namespace url {
139119
*output = *input;
140120
return true;
141121
}
142-
143-
static bool IsValidUTF8(std::string* input) {
144-
return true;
145-
}
146122
#endif
147123

148124
// If a UTF-16 character is a low/trailing surrogate.
@@ -395,12 +371,6 @@ namespace url {
395371
if (PercentDecode(input, length, &decoded) < 0)
396372
goto end;
397373

398-
// If there are any invalid UTF8 byte sequences, we have to fail.
399-
// Unfortunately this means iterating through the string and checking
400-
// each decoded codepoint.
401-
if (!IsValidUTF8(&decoded))
402-
goto end;
403-
404374
// Then we have to punycode toASCII
405375
if (!ToASCII(&decoded, &decoded))
406376
goto end;

0 commit comments

Comments
 (0)