Skip to content

Commit 72a3cac

Browse files
TimothyGuevanlucas
authored andcommitted
src: remove explicit UTF-8 validity check in url
This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: #12507 Refs: c2a302c "src: do not ignore IDNA conversion error" Reviewed-By: James M Snell <[email protected]>
1 parent b8ff2c9 commit 72a3cac

File tree

1 file changed

+0
-30
lines changed

1 file changed

+0
-30
lines changed

src/node_url.cc

-30
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,6 @@
1515
#include <stdio.h>
1616
#include <cmath>
1717

18-
#if defined(NODE_HAVE_I18N_SUPPORT)
19-
#include <unicode/utf8.h>
20-
#include <unicode/utf.h>
21-
#endif
22-
2318
#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
2419

2520
namespace node {
@@ -74,21 +69,6 @@ namespace url {
7469
output->assign(*buf, buf.length());
7570
return true;
7671
}
77-
78-
// Unfortunately there's not really a better way to do this.
79-
// Iterate through each encoded codepoint and verify that
80-
// it is a valid unicode codepoint.
81-
static bool IsValidUTF8(std::string* input) {
82-
const char* p = input->c_str();
83-
int32_t len = input->length();
84-
for (int32_t i = 0; i < len;) {
85-
UChar32 c;
86-
U8_NEXT_UNSAFE(p, i, c);
87-
if (!U_IS_UNICODE_CHAR(c))
88-
return false;
89-
}
90-
return true;
91-
}
9272
#else
9373
// Intentional non-ops if ICU is not present.
9474
static inline bool ToUnicode(std::string* input, std::string* output) {
@@ -100,10 +80,6 @@ namespace url {
10080
*output = *input;
10181
return true;
10282
}
103-
104-
static bool IsValidUTF8(std::string* input) {
105-
return true;
106-
}
10783
#endif
10884

10985
// If a UTF-16 character is a low/trailing surrogate.
@@ -355,12 +331,6 @@ namespace url {
355331
// First, we have to percent decode
356332
PercentDecode(input, length, &decoded);
357333

358-
// If there are any invalid UTF8 byte sequences, we have to fail.
359-
// Unfortunately this means iterating through the string and checking
360-
// each decoded codepoint.
361-
if (!IsValidUTF8(&decoded))
362-
goto end;
363-
364334
// Then we have to punycode toASCII
365335
if (!ToASCII(&decoded, &decoded))
366336
goto end;

0 commit comments

Comments
 (0)