Skip to content

Commit ba817d3

Browse files
TimothyGujasnell
authored andcommitted
url: update IDNA handling
Remove custom tests for invalid IDNA domains in url-idna.js in favor of the more comprehensive official set. PR-URL: #13362 Refs: whatwg/url#309 Refs: web-platform-tests/wpt#5976 Reviewed-By: Refael Ackermann <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Daijiro Wachi <[email protected]>
1 parent a117bcc commit ba817d3

7 files changed

+543
-253
lines changed

src/node_i18n.cc

+35-19
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,9 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
450450
&info,
451451
&status);
452452

453+
// Do not check info.errors like we do with ToASCII since ToUnicode always
454+
// returns a string, despite any possible errors that may have occurred.
455+
453456
if (status == U_BUFFER_OVERFLOW_ERROR) {
454457
status = U_ZERO_ERROR;
455458
buf->AllocateSufficientStorage(len);
@@ -477,9 +480,18 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
477480
int32_t ToASCII(MaybeStackBuffer<char>* buf,
478481
const char* input,
479482
size_t length,
480-
bool lenient) {
483+
enum idna_mode mode) {
481484
UErrorCode status = U_ZERO_ERROR;
482-
uint32_t options = UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI;
485+
uint32_t options = // CheckHyphens = false; handled later
486+
UIDNA_CHECK_BIDI | // CheckBidi = true
487+
UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
488+
UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
489+
if (mode == IDNA_STRICT) {
490+
options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
491+
// VerifyDnsLength = beStrict;
492+
// handled later
493+
}
494+
483495
UIDNA* uidna = uidna_openUTS46(options, &status);
484496
if (U_FAILURE(status))
485497
return -1;
@@ -501,21 +513,17 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
501513
&status);
502514
}
503515

504-
// The WHATWG URL "domain to ASCII" algorithm explicitly sets the
505-
// VerifyDnsLength flag to false, which disables the domain name length
506-
// verification step in ToASCII (as specified by UTS #46). Unfortunately,
507-
// ICU4C's IDNA module does not support disabling this flag through `options`,
508-
// so just filter out the errors that may be caused by the verification step
509-
// afterwards.
510-
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
511-
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
512-
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
513-
514-
// These error conditions are mandated unconditionally by UTS #46 version
515-
// 9.0.0 (rev. 17), but were found to be incompatible with actual domain
516-
// names in the wild. As such, in the current UTS #46 draft (rev. 18) these
517-
// checks are made optional depending on the CheckHyphens flag, which will be
518-
// disabled in WHATWG URL's "domain to ASCII" algorithm soon.
516+
// In UTS #46 which specifies ToASCII, certain error conditions are
517+
// configurable through options, and the WHATWG URL Standard promptly elects
518+
// to disable some of them to accomodate for real-world use cases.
519+
// Unfortunately, ICU4C's IDNA module does not support disabling some of
520+
// these options through `options` above, and thus continues throwing
521+
// unnecessary errors. To counter this situation, we just filter out the
522+
// errors that may have happened afterwards, before deciding whether to
523+
// return an error from this function.
524+
525+
// CheckHyphens = false
526+
// (Specified in the current UTS #46 draft rev. 18.)
519527
// Refs:
520528
// - https://github.com/whatwg/url/issues/53
521529
// - https://github.com/whatwg/url/pull/309
@@ -526,7 +534,14 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
526534
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
527535
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
528536

529-
if (U_FAILURE(status) || (!lenient && info.errors != 0)) {
537+
if (mode != IDNA_STRICT) {
538+
// VerifyDnsLength = beStrict
539+
info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
540+
info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
541+
info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
542+
}
543+
544+
if (U_FAILURE(status) || (mode != IDNA_LENIENT && info.errors != 0)) {
530545
len = -1;
531546
buf->SetLength(0);
532547
} else {
@@ -564,9 +579,10 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
564579
Utf8Value val(env->isolate(), args[0]);
565580
// optional arg
566581
bool lenient = args[1]->BooleanValue(env->context()).FromJust();
582+
enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
567583

568584
MaybeStackBuffer<char> buf;
569-
int32_t len = ToASCII(&buf, *val, val.length(), lenient);
585+
int32_t len = ToASCII(&buf, *val, val.length(), mode);
570586

571587
if (len < 0) {
572588
return env->ThrowError("Cannot convert name to ASCII");

src/node_i18n.h

+17-1
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,26 @@ namespace i18n {
3737

3838
bool InitializeICUDirectory(const std::string& path);
3939

40+
enum idna_mode {
41+
// Default mode for maximum compatibility.
42+
IDNA_DEFAULT,
43+
// Ignore all errors in IDNA conversion, if possible.
44+
IDNA_LENIENT,
45+
// Enforce STD3 rules (UseSTD3ASCIIRules) and DNS length restrictions
46+
// (VerifyDnsLength). Corresponds to `beStrict` flag in the "domain to ASCII"
47+
// algorithm.
48+
IDNA_STRICT
49+
};
50+
51+
// Implements the WHATWG URL Standard "domain to ASCII" algorithm.
52+
// https://url.spec.whatwg.org/#concept-domain-to-ascii
4053
int32_t ToASCII(MaybeStackBuffer<char>* buf,
4154
const char* input,
4255
size_t length,
43-
bool lenient = false);
56+
enum idna_mode mode = IDNA_DEFAULT);
57+
58+
// Implements the WHATWG URL Standard "domain to Unicode" algorithm.
59+
// https://url.spec.whatwg.org/#concept-domain-to-unicode
4460
int32_t ToUnicode(MaybeStackBuffer<char>* buf,
4561
const char* input,
4662
size_t length);

0 commit comments

Comments
 (0)