src: remove explicit UTF-8 validity check in url

TimothyGu · TimothyGu · commit d099f8e317a6 · 2017-03-16T16:39:39.000-07:00
This step was never part of the URL Standard's host parser algorithm, and is rendered unnecessary after IDNA errors are no longer ignored. PR-URL: #11859 Refs: c2a302c "src: do not ignore IDNA conversion error" Refs: https://url.spec.whatwg.org/#concept-host-parser Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Daijiro Wachi <daijiro.wachi@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
diff --git a/src/node_url.cc b/src/node_url.cc
@@ -15,11 +15,6 @@
 #include <stdio.h>
 #include <cmath>
 
-#if defined(NODE_HAVE_I18N_SUPPORT)
-#include <unicode/utf8.h>
-#include <unicode/utf.h>
-#endif
-
 #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
 
 namespace node {
@@ -113,21 +108,6 @@ namespace url {
     output->assign(*buf, buf.length());
     return true;
   }
-
-  // Unfortunately there's not really a better way to do this.
-  // Iterate through each encoded codepoint and verify that
-  // it is a valid unicode codepoint.
-  static bool IsValidUTF8(std::string* input) {
-    const char* p = input->c_str();
-    int32_t len = input->length();
-    for (int32_t i = 0; i < len;) {
-      UChar32 c;
-      U8_NEXT_UNSAFE(p, i, c);
-      if (!U_IS_UNICODE_CHAR(c))
-        return false;
-    }
-    return true;
-  }
 #else
   // Intentional non-ops if ICU is not present.
   static bool ToUnicode(std::string* input, std::string* output) {
@@ -139,10 +119,6 @@ namespace url {
     *output = *input;
     return true;
   }
-
-  static bool IsValidUTF8(std::string* input) {
-    return true;
-  }
 #endif
 
   // If a UTF-16 character is a low/trailing surrogate.
@@ -395,12 +371,6 @@ namespace url {
     if (PercentDecode(input, length, &decoded) < 0)
       goto end;
 
-    // If there are any invalid UTF8 byte sequences, we have to fail.
-    // Unfortunately this means iterating through the string and checking
-    // each decoded codepoint.
-    if (!IsValidUTF8(&decoded))
-      goto end;
-
     // Then we have to punycode toASCII
     if (!ToASCII(&decoded, &decoded))
       goto end;