19
19
20
20
#include "uv.h"
21
21
#include "idna.h"
22
+ #include <assert.h>
22
23
#include <string.h>
23
24
24
25
static unsigned uv__utf8_decode1_slow (const char * * p ,
@@ -32,7 +33,7 @@ static unsigned uv__utf8_decode1_slow(const char** p,
32
33
if (a > 0xF7 )
33
34
return -1 ;
34
35
35
- switch (* p - pe ) {
36
+ switch (pe - * p ) {
36
37
default :
37
38
if (a > 0xEF ) {
38
39
min = 0x10000 ;
@@ -62,6 +63,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
62
63
a = 0 ;
63
64
break ;
64
65
}
66
+ /* Fall through. */
67
+ case 0 :
65
68
return -1 ; /* Invalid continuation byte. */
66
69
}
67
70
@@ -88,6 +91,8 @@ static unsigned uv__utf8_decode1_slow(const char** p,
88
91
unsigned uv__utf8_decode1 (const char * * p , const char * pe ) {
89
92
unsigned a ;
90
93
94
+ assert (* p < pe );
95
+
91
96
a = (unsigned char ) * (* p )++ ;
92
97
93
98
if (a < 128 )
@@ -96,9 +101,6 @@ unsigned uv__utf8_decode1(const char** p, const char* pe) {
96
101
return uv__utf8_decode1_slow (p , pe , a );
97
102
}
98
103
99
- #define foreach_codepoint (c , p , pe ) \
100
- for (; (void) (*p <= pe && (c = uv__utf8_decode1(p, pe))), *p <= pe;)
101
-
102
104
static int uv__idna_toascii_label (const char * s , const char * se ,
103
105
char * * d , char * de ) {
104
106
static const char alphabet [] = "abcdefghijklmnopqrstuvwxyz0123456789" ;
@@ -121,25 +123,36 @@ static int uv__idna_toascii_label(const char* s, const char* se,
121
123
ss = s ;
122
124
todo = 0 ;
123
125
124
- foreach_codepoint (c , & s , se ) {
126
+ /* Note: after this loop we've visited all UTF-8 characters and know
127
+ * they're legal so we no longer need to check for decode errors.
128
+ */
129
+ while (s < se ) {
130
+ c = uv__utf8_decode1 (& s , se );
131
+
132
+ if (c == -1u )
133
+ return UV_EINVAL ;
134
+
125
135
if (c < 128 )
126
136
h ++ ;
127
- else if (c == (unsigned ) -1 )
128
- return UV_EINVAL ;
129
137
else
130
138
todo ++ ;
131
139
}
132
140
141
+ /* Only write "xn--" when there are non-ASCII characters. */
133
142
if (todo > 0 ) {
134
143
if (* d < de ) * (* d )++ = 'x' ;
135
144
if (* d < de ) * (* d )++ = 'n' ;
136
145
if (* d < de ) * (* d )++ = '-' ;
137
146
if (* d < de ) * (* d )++ = '-' ;
138
147
}
139
148
149
+ /* Write ASCII characters. */
140
150
x = 0 ;
141
151
s = ss ;
142
- foreach_codepoint (c , & s , se ) {
152
+ while (s < se ) {
153
+ c = uv__utf8_decode1 (& s , se );
154
+ assert (c != -1u );
155
+
143
156
if (c > 127 )
144
157
continue ;
145
158
@@ -166,10 +179,15 @@ static int uv__idna_toascii_label(const char* s, const char* se,
166
179
while (todo > 0 ) {
167
180
m = -1 ;
168
181
s = ss ;
169
- foreach_codepoint (c , & s , se )
182
+
183
+ while (s < se ) {
184
+ c = uv__utf8_decode1 (& s , se );
185
+ assert (c != -1u );
186
+
170
187
if (c >= n )
171
188
if (c < m )
172
189
m = c ;
190
+ }
173
191
174
192
x = m - n ;
175
193
y = h + 1 ;
@@ -181,7 +199,10 @@ static int uv__idna_toascii_label(const char* s, const char* se,
181
199
n = m ;
182
200
183
201
s = ss ;
184
- foreach_codepoint (c , & s , se ) {
202
+ while (s < se ) {
203
+ c = uv__utf8_decode1 (& s , se );
204
+ assert (c != -1u );
205
+
185
206
if (c < n )
186
207
if (++ delta == 0 )
187
208
return UV_E2BIG ; /* Overflow. */
@@ -245,8 +266,6 @@ static int uv__idna_toascii_label(const char* s, const char* se,
245
266
return 0 ;
246
267
}
247
268
248
- #undef foreach_codepoint
249
-
250
269
long uv__idna_toascii (const char * s , const char * se , char * d , char * de ) {
251
270
const char * si ;
252
271
const char * st ;
@@ -256,10 +275,14 @@ long uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
256
275
257
276
ds = d ;
258
277
259
- for (si = s ; si < se ; /* empty */ ) {
278
+ si = s ;
279
+ while (si < se ) {
260
280
st = si ;
261
281
c = uv__utf8_decode1 (& si , se );
262
282
283
+ if (c == -1u )
284
+ return UV_EINVAL ;
285
+
263
286
if (c != '.' )
264
287
if (c != 0x3002 ) /* 。 */
265
288
if (c != 0xFF0E ) /* . */
0 commit comments