1
- /* auto-generated on 2023-04-20 18:39:35 -0400. Do not edit! */
1
+ /* auto-generated on 2023-04-26 16:43:37 -0400. Do not edit! */
2
2
/* begin file src/ada.cpp */
3
3
#include "ada.h"
4
4
/* begin file src/checkers.cpp */
@@ -116,12 +116,13 @@ ada_really_inline constexpr bool verify_dns_length(
116
116
117
117
ADA_PUSH_DISABLE_ALL_WARNINGS
118
118
/* begin file src/ada_idna.cpp */
119
- /* auto-generated on 2023-03-28 11:03:13 -0400. Do not edit! */
119
+ /* auto-generated on 2023-04-26 14:14:42 -0400. Do not edit! */
120
120
/* begin file src/idna.cpp */
121
121
/* begin file src/unicode_transcoding.cpp */
122
122
123
123
#include <cstdint>
124
124
#include <cstring>
125
+
125
126
namespace ada::idna {
126
127
127
128
size_t utf8_to_utf32(const char* buf, size_t len, char32_t* utf32_output) {
@@ -2750,7 +2751,9 @@ uint32_t find_range_index(uint32_t key) {
2750
2751
}
2751
2752
2752
2753
bool ascii_has_upper_case(char* input, size_t length) {
2753
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
2754
+ auto broadcast = [](uint8_t v) -> uint64_t {
2755
+ return 0x101010101010101ull * v;
2756
+ };
2754
2757
uint64_t broadcast_80 = broadcast(0x80);
2755
2758
uint64_t broadcast_Ap = broadcast(128 - 'A');
2756
2759
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -2772,7 +2775,9 @@ bool ascii_has_upper_case(char* input, size_t length) {
2772
2775
}
2773
2776
2774
2777
void ascii_map(char* input, size_t length) {
2775
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
2778
+ auto broadcast = [](uint8_t v) -> uint64_t {
2779
+ return 0x101010101010101ull * v;
2780
+ };
2776
2781
uint64_t broadcast_80 = broadcast(0x80);
2777
2782
uint64_t broadcast_Ap = broadcast(128 - 'A');
2778
2783
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -7999,9 +8004,10 @@ const char32_t uninorms::decomposition_data[] = {
7999
8004
namespace ada::idna {
8000
8005
8001
8006
void normalize(std::u32string& input) {
8002
- // [Normalize](https://www.unicode.org/reports/tr46/#ProcessingStepNormalize).
8003
- // Normalize
8004
- // the domain_name string to Unicode Normalization Form C.
8007
+ /**
8008
+ * Normalize the domain_name string to Unicode Normalization Form C.
8009
+ * @see https://www.unicode.org/reports/tr46/#ProcessingStepNormalize
8010
+ */
8005
8011
ufal::unilib::uninorms::nfc(input);
8006
8012
}
8007
8013
@@ -8229,7 +8235,6 @@ bool utf32_to_punycode(std::u32string_view input, std::string &out) {
8229
8235
} // namespace ada::idna
8230
8236
/* end file src/punycode.cpp */
8231
8237
/* begin file src/validity.cpp */
8232
-
8233
8238
#include <algorithm>
8234
8239
#include <string_view>
8235
8240
@@ -9617,18 +9622,18 @@ constexpr static uint8_t is_forbidden_domain_code_point_table[] = {
9617
9622
9618
9623
static_assert(sizeof(is_forbidden_domain_code_point_table) == 256);
9619
9624
9620
- inline constexpr bool is_forbidden_domain_code_point(const char c) noexcept {
9625
+ inline bool is_forbidden_domain_code_point(const char c) noexcept {
9621
9626
return is_forbidden_domain_code_point_table[uint8_t(c)];
9622
9627
}
9623
9628
9624
- // We return "" on error. For now.
9625
- std::string from_ascii_to_ascii(std::string_view ut8_string) {
9626
- static const std::string error = "";
9627
- if (std::any_of(ut8_string.begin(), ut8_string.end(),
9628
- is_forbidden_domain_code_point)) {
9629
- return error;
9630
- }
9629
+ bool contains_forbidden_domain_code_point(std::string_view view) {
9630
+ return (
9631
+ std::any_of(view.begin(), view.end(), is_forbidden_domain_code_point));
9632
+ }
9631
9633
9634
+ // We return "" on error.
9635
+ static std::string from_ascii_to_ascii(std::string_view ut8_string) {
9636
+ static const std::string error = "";
9632
9637
// copy and map
9633
9638
// we could be more efficient by avoiding the copy when unnecessary.
9634
9639
std::string mapped_string = std::string(ut8_string);
@@ -9682,7 +9687,7 @@ std::string from_ascii_to_ascii(std::string_view ut8_string) {
9682
9687
return out;
9683
9688
}
9684
9689
9685
- // We return "" on error. For now.
9690
+ // We return "" on error.
9686
9691
std::string to_ascii(std::string_view ut8_string) {
9687
9692
if (is_ascii(ut8_string)) {
9688
9693
return from_ascii_to_ascii(ut8_string);
@@ -9769,11 +9774,6 @@ std::string to_ascii(std::string_view ut8_string) {
9769
9774
out.push_back('.');
9770
9775
}
9771
9776
}
9772
-
9773
- if (std::any_of(out.begin(), out.end(), is_forbidden_domain_code_point)) {
9774
- return error;
9775
- }
9776
-
9777
9777
return out;
9778
9778
}
9779
9779
} // namespace ada::idna
@@ -9842,7 +9842,9 @@ ADA_POP_DISABLE_WARNINGS
9842
9842
namespace ada::unicode {
9843
9843
9844
9844
constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
9845
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
9845
+ auto broadcast = [](uint8_t v) -> uint64_t {
9846
+ return 0x101010101010101ull * v;
9847
+ };
9846
9848
uint64_t broadcast_80 = broadcast(0x80);
9847
9849
uint64_t broadcast_Ap = broadcast(128 - 'A');
9848
9850
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
@@ -9873,7 +9875,9 @@ ada_really_inline constexpr bool has_tabs_or_newline(
9873
9875
auto has_zero_byte = [](uint64_t v) {
9874
9876
return ((v - 0x0101010101010101) & ~(v)&0x8080808080808080);
9875
9877
};
9876
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
9878
+ auto broadcast = [](uint8_t v) -> uint64_t {
9879
+ return 0x101010101010101ull * v;
9880
+ };
9877
9881
size_t i = 0;
9878
9882
uint64_t mask1 = broadcast('\r');
9879
9883
uint64_t mask2 = broadcast('\n');
@@ -10252,7 +10256,8 @@ bool to_ascii(std::optional<std::string>& out, const std::string_view plain,
10252
10256
}
10253
10257
// input is a non-empty UTF-8 string, must be percent decoded
10254
10258
std::string idna_ascii = ada::idna::to_ascii(input);
10255
- if (idna_ascii.empty()) {
10259
+ if (idna_ascii.empty() || contains_forbidden_domain_code_point(
10260
+ idna_ascii.data(), idna_ascii.size())) {
10256
10261
return false;
10257
10262
}
10258
10263
out = std::move(idna_ascii);
@@ -10627,7 +10632,9 @@ ada_really_inline size_t find_next_host_delimiter_special(
10627
10632
auto index_of_first_set_byte = [](uint64_t v) {
10628
10633
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
10629
10634
};
10630
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
10635
+ auto broadcast = [](uint8_t v) -> uint64_t {
10636
+ return 0x101010101010101ull * v;
10637
+ };
10631
10638
size_t i = location;
10632
10639
uint64_t mask1 = broadcast(':');
10633
10640
uint64_t mask2 = broadcast('/');
@@ -10690,7 +10697,9 @@ ada_really_inline size_t find_next_host_delimiter(std::string_view view,
10690
10697
auto index_of_first_set_byte = [](uint64_t v) {
10691
10698
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
10692
10699
};
10693
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
10700
+ auto broadcast = [](uint8_t v) -> uint64_t {
10701
+ return 0x101010101010101ull * v;
10702
+ };
10694
10703
size_t i = location;
10695
10704
uint64_t mask1 = broadcast(':');
10696
10705
uint64_t mask2 = broadcast('/');
@@ -11016,7 +11025,9 @@ find_authority_delimiter_special(std::string_view view) noexcept {
11016
11025
auto index_of_first_set_byte = [](uint64_t v) {
11017
11026
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
11018
11027
};
11019
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
11028
+ auto broadcast = [](uint8_t v) -> uint64_t {
11029
+ return 0x101010101010101ull * v;
11030
+ };
11020
11031
size_t i = 0;
11021
11032
uint64_t mask1 = broadcast('@');
11022
11033
uint64_t mask2 = broadcast('/');
@@ -11064,7 +11075,9 @@ find_authority_delimiter(std::string_view view) noexcept {
11064
11075
auto index_of_first_set_byte = [](uint64_t v) {
11065
11076
return ((((v - 1) & 0x101010101010101) * 0x101010101010101) >> 56) - 1;
11066
11077
};
11067
- auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
11078
+ auto broadcast = [](uint8_t v) -> uint64_t {
11079
+ return 0x101010101010101ull * v;
11080
+ };
11068
11081
size_t i = 0;
11069
11082
uint64_t mask1 = broadcast('@');
11070
11083
uint64_t mask2 = broadcast('/');
0 commit comments