@@ -450,6 +450,9 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
450
450
&info,
451
451
&status);
452
452
453
+ // Do not check info.errors like we do with ToASCII since ToUnicode always
454
+ // returns a string, despite any possible errors that may have occurred.
455
+
453
456
if (status == U_BUFFER_OVERFLOW_ERROR) {
454
457
status = U_ZERO_ERROR;
455
458
buf->AllocateSufficientStorage (len);
@@ -477,9 +480,18 @@ int32_t ToUnicode(MaybeStackBuffer<char>* buf,
477
480
int32_t ToASCII (MaybeStackBuffer<char >* buf,
478
481
const char * input,
479
482
size_t length,
480
- bool lenient ) {
483
+ enum idna_mode mode ) {
481
484
UErrorCode status = U_ZERO_ERROR;
482
- uint32_t options = UIDNA_NONTRANSITIONAL_TO_ASCII | UIDNA_CHECK_BIDI;
485
+ uint32_t options = // CheckHyphens = false; handled later
486
+ UIDNA_CHECK_BIDI | // CheckBidi = true
487
+ UIDNA_CHECK_CONTEXTJ | // CheckJoiners = true
488
+ UIDNA_NONTRANSITIONAL_TO_ASCII; // Nontransitional_Processing
489
+ if (mode == IDNA_STRICT) {
490
+ options |= UIDNA_USE_STD3_RULES; // UseSTD3ASCIIRules = beStrict
491
+ // VerifyDnsLength = beStrict;
492
+ // handled later
493
+ }
494
+
483
495
UIDNA* uidna = uidna_openUTS46 (options, &status);
484
496
if (U_FAILURE (status))
485
497
return -1 ;
@@ -501,21 +513,17 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
501
513
&status);
502
514
}
503
515
504
- // The WHATWG URL "domain to ASCII" algorithm explicitly sets the
505
- // VerifyDnsLength flag to false, which disables the domain name length
506
- // verification step in ToASCII (as specified by UTS #46). Unfortunately,
507
- // ICU4C's IDNA module does not support disabling this flag through `options`,
508
- // so just filter out the errors that may be caused by the verification step
509
- // afterwards.
510
- info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
511
- info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
512
- info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
513
-
514
- // These error conditions are mandated unconditionally by UTS #46 version
515
- // 9.0.0 (rev. 17), but were found to be incompatible with actual domain
516
- // names in the wild. As such, in the current UTS #46 draft (rev. 18) these
517
- // checks are made optional depending on the CheckHyphens flag, which will be
518
- // disabled in WHATWG URL's "domain to ASCII" algorithm soon.
516
+ // In UTS #46 which specifies ToASCII, certain error conditions are
517
+ // configurable through options, and the WHATWG URL Standard promptly elects
518
+ // to disable some of them to accomodate for real-world use cases.
519
+ // Unfortunately, ICU4C's IDNA module does not support disabling some of
520
+ // these options through `options` above, and thus continues throwing
521
+ // unnecessary errors. To counter this situation, we just filter out the
522
+ // errors that may have happened afterwards, before deciding whether to
523
+ // return an error from this function.
524
+
525
+ // CheckHyphens = false
526
+ // (Specified in the current UTS #46 draft rev. 18.)
519
527
// Refs:
520
528
// - https://github.com/whatwg/url/issues/53
521
529
// - https://github.com/whatwg/url/pull/309
@@ -526,7 +534,14 @@ int32_t ToASCII(MaybeStackBuffer<char>* buf,
526
534
info.errors &= ~UIDNA_ERROR_LEADING_HYPHEN;
527
535
info.errors &= ~UIDNA_ERROR_TRAILING_HYPHEN;
528
536
529
- if (U_FAILURE (status) || (!lenient && info.errors != 0 )) {
537
+ if (mode != IDNA_STRICT) {
538
+ // VerifyDnsLength = beStrict
539
+ info.errors &= ~UIDNA_ERROR_EMPTY_LABEL;
540
+ info.errors &= ~UIDNA_ERROR_LABEL_TOO_LONG;
541
+ info.errors &= ~UIDNA_ERROR_DOMAIN_NAME_TOO_LONG;
542
+ }
543
+
544
+ if (U_FAILURE (status) || (mode != IDNA_LENIENT && info.errors != 0 )) {
530
545
len = -1 ;
531
546
buf->SetLength (0 );
532
547
} else {
@@ -564,9 +579,10 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
564
579
Utf8Value val (env->isolate (), args[0 ]);
565
580
// optional arg
566
581
bool lenient = args[1 ]->BooleanValue (env->context ()).FromJust ();
582
+ enum idna_mode mode = lenient ? IDNA_LENIENT : IDNA_DEFAULT;
567
583
568
584
MaybeStackBuffer<char > buf;
569
- int32_t len = ToASCII (&buf, *val, val.length (), lenient );
585
+ int32_t len = ToASCII (&buf, *val, val.length (), mode );
570
586
571
587
if (len < 0 ) {
572
588
return env->ThrowError (" Cannot convert name to ASCII" );
0 commit comments