@@ -50,7 +50,7 @@ extern void *memset(void *s, int c, size_t n);
50
50
#include "rust-demangle.h"
51
51
52
52
53
- /* Mangled Rust symbols look like this:
53
+ /* Mangled (legacy) Rust symbols look like this:
54
54
_$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
55
55
56
56
The original symbol is:
@@ -74,16 +74,7 @@ extern void *memset(void *s, int c, size_t n);
74
74
">" => $GT$
75
75
"(" => $LP$
76
76
")" => $RP$
77
- " " => $u20$
78
- "\"" => $u22$
79
- "'" => $u27$
80
- "+" => $u2b$
81
- ";" => $u3b$
82
- "[" => $u5b$
83
- "]" => $u5d$
84
- "{" => $u7b$
85
- "}" => $u7d$
86
- "~" => $u7e$
77
+ "\u{XY}" => $uXY$
87
78
88
79
A double ".." means "::" and a single "." means "-".
89
80
@@ -95,7 +86,8 @@ static const size_t hash_len = 16;
95
86
96
87
static int is_prefixed_hash (const char * start );
97
88
static int looks_like_rust (const char * sym , size_t len );
98
- static int unescape (const char * * in , char * * out , const char * seq , char value );
89
+ static int parse_lower_hex_nibble (char nibble );
90
+ static char parse_legacy_escape (const char * * in );
99
91
100
92
/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
101
93
@@ -149,20 +141,20 @@ is_prefixed_hash (const char *str)
149
141
const char * end ;
150
142
char seen [16 ];
151
143
size_t i ;
152
- int count ;
144
+ int count , nibble ;
153
145
154
146
if (strncmp (str , hash_prefix , hash_prefix_len ))
155
147
return 0 ;
156
148
str += hash_prefix_len ;
157
149
158
150
memset (seen , 0 , sizeof (seen ));
159
151
for (end = str + hash_len ; str < end ; str ++ )
160
- if ( * str >= '0' && * str <= '9' )
161
- seen [ * str - '0' ] = 1 ;
162
- else if (* str >= 'a' && * str <= 'f' )
163
- seen [ * str - 'a' + 10 ] = 1 ;
164
- else
165
- return 0 ;
152
+ {
153
+ nibble = parse_lower_hex_nibble ( * str ) ;
154
+ if (nibble < 0 )
155
+ return 0 ;
156
+ seen [ nibble ] = 1 ;
157
+ }
166
158
167
159
/* Count how many distinct digits seen */
168
160
count = 0 ;
@@ -179,57 +171,17 @@ looks_like_rust (const char *str, size_t len)
179
171
const char * end = str + len ;
180
172
181
173
while (str < end )
182
- switch (* str )
183
- {
184
- case '$' :
185
- if (!strncmp (str , "$C$" , 3 ))
186
- str += 3 ;
187
- else if (!strncmp (str , "$SP$" , 4 )
188
- || !strncmp (str , "$BP$" , 4 )
189
- || !strncmp (str , "$RF$" , 4 )
190
- || !strncmp (str , "$LT$" , 4 )
191
- || !strncmp (str , "$GT$" , 4 )
192
- || !strncmp (str , "$LP$" , 4 )
193
- || !strncmp (str , "$RP$" , 4 ))
194
- str += 4 ;
195
- else if (!strncmp (str , "$u20$" , 5 )
196
- || !strncmp (str , "$u22$" , 5 )
197
- || !strncmp (str , "$u27$" , 5 )
198
- || !strncmp (str , "$u2b$" , 5 )
199
- || !strncmp (str , "$u3b$" , 5 )
200
- || !strncmp (str , "$u5b$" , 5 )
201
- || !strncmp (str , "$u5d$" , 5 )
202
- || !strncmp (str , "$u7b$" , 5 )
203
- || !strncmp (str , "$u7d$" , 5 )
204
- || !strncmp (str , "$u7e$" , 5 ))
205
- str += 5 ;
206
- else
207
- return 0 ;
208
- break ;
209
- case '.' :
210
- /* Do not allow three or more consecutive dots */
211
- if (!strncmp (str , "..." , 3 ))
212
- return 0 ;
213
- /* Fall through */
214
- case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
215
- case 'g' : case 'h' : case 'i' : case 'j' : case 'k' : case 'l' :
216
- case 'm' : case 'n' : case 'o' : case 'p' : case 'q' : case 'r' :
217
- case 's' : case 't' : case 'u' : case 'v' : case 'w' : case 'x' :
218
- case 'y' : case 'z' :
219
- case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
220
- case 'G' : case 'H' : case 'I' : case 'J' : case 'K' : case 'L' :
221
- case 'M' : case 'N' : case 'O' : case 'P' : case 'Q' : case 'R' :
222
- case 'S' : case 'T' : case 'U' : case 'V' : case 'W' : case 'X' :
223
- case 'Y' : case 'Z' :
224
- case '0' : case '1' : case '2' : case '3' : case '4' : case '5' :
225
- case '6' : case '7' : case '8' : case '9' :
226
- case '_' :
227
- case ':' :
228
- str ++ ;
229
- break ;
230
- default :
231
- return 0 ;
232
- }
174
+ {
175
+ if (* str == '$' )
176
+ {
177
+ if (!parse_legacy_escape (& str ))
178
+ return 0 ;
179
+ }
180
+ else if (* str == '.' || * str == '_' || * str == ':' || ISALNUM (* str ))
181
+ str ++ ;
182
+ else
183
+ return 0 ;
184
+ }
233
185
234
186
return 1 ;
235
187
}
@@ -246,6 +198,7 @@ rust_demangle_sym (char *sym)
246
198
const char * in ;
247
199
char * out ;
248
200
const char * end ;
201
+ char unescaped ;
249
202
250
203
if (!sym )
251
204
return ;
@@ -255,75 +208,49 @@ rust_demangle_sym (char *sym)
255
208
end = sym + strlen (sym ) - (hash_prefix_len + hash_len );
256
209
257
210
while (in < end )
258
- switch (* in )
259
- {
260
- case '$' :
261
- if (!(unescape (& in , & out , "$C$" , ',' )
262
- || unescape (& in , & out , "$SP$" , '@' )
263
- || unescape (& in , & out , "$BP$" , '*' )
264
- || unescape (& in , & out , "$RF$" , '&' )
265
- || unescape (& in , & out , "$LT$" , '<' )
266
- || unescape (& in , & out , "$GT$" , '>' )
267
- || unescape (& in , & out , "$LP$" , '(' )
268
- || unescape (& in , & out , "$RP$" , ')' )
269
- || unescape (& in , & out , "$u20$" , ' ' )
270
- || unescape (& in , & out , "$u22$" , '\"' )
271
- || unescape (& in , & out , "$u27$" , '\'' )
272
- || unescape (& in , & out , "$u2b$" , '+' )
273
- || unescape (& in , & out , "$u3b$" , ';' )
274
- || unescape (& in , & out , "$u5b$" , '[' )
275
- || unescape (& in , & out , "$u5d$" , ']' )
276
- || unescape (& in , & out , "$u7b$" , '{' )
277
- || unescape (& in , & out , "$u7d$" , '}' )
278
- || unescape (& in , & out , "$u7e$" , '~' ))) {
279
- /* unexpected escape sequence, not looks_like_rust. */
280
- goto fail ;
281
- }
282
- break ;
283
- case '_' :
284
- /* If this is the start of a path component and the next
285
- character is an escape sequence, ignore the underscore. The
286
- mangler inserts an underscore to make sure the path
287
- component begins with a XID_Start character. */
288
- if ((in == sym || in [-1 ] == ':' ) && in [1 ] == '$' )
289
- in ++ ;
290
- else
291
- * out ++ = * in ++ ;
292
- break ;
293
- case '.' :
294
- if (in [1 ] == '.' )
295
- {
296
- /* ".." becomes "::" */
297
- * out ++ = ':' ;
298
- * out ++ = ':' ;
299
- in += 2 ;
300
- }
301
- else
302
- {
303
- /* "." becomes "-" */
304
- * out ++ = '-' ;
305
- in ++ ;
306
- }
307
- break ;
308
- case 'a' : case 'b' : case 'c' : case 'd' : case 'e' : case 'f' :
309
- case 'g' : case 'h' : case 'i' : case 'j' : case 'k' : case 'l' :
310
- case 'm' : case 'n' : case 'o' : case 'p' : case 'q' : case 'r' :
311
- case 's' : case 't' : case 'u' : case 'v' : case 'w' : case 'x' :
312
- case 'y' : case 'z' :
313
- case 'A' : case 'B' : case 'C' : case 'D' : case 'E' : case 'F' :
314
- case 'G' : case 'H' : case 'I' : case 'J' : case 'K' : case 'L' :
315
- case 'M' : case 'N' : case 'O' : case 'P' : case 'Q' : case 'R' :
316
- case 'S' : case 'T' : case 'U' : case 'V' : case 'W' : case 'X' :
317
- case 'Y' : case 'Z' :
318
- case '0' : case '1' : case '2' : case '3' : case '4' : case '5' :
319
- case '6' : case '7' : case '8' : case '9' :
320
- case ':' :
321
- * out ++ = * in ++ ;
322
- break ;
323
- default :
324
- /* unexpected character in symbol, not looks_like_rust. */
325
- goto fail ;
326
- }
211
+ {
212
+ if (* in == '$' )
213
+ {
214
+ unescaped = parse_legacy_escape (& in );
215
+ if (unescaped )
216
+ * out ++ = unescaped ;
217
+ else
218
+ /* unexpected escape sequence, not looks_like_rust. */
219
+ goto fail ;
220
+ }
221
+ else if (* in == '_' )
222
+ {
223
+ /* If this is the start of a path component and the next
224
+ character is an escape sequence, ignore the underscore. The
225
+ mangler inserts an underscore to make sure the path
226
+ component begins with a XID_Start character. */
227
+ if ((in == sym || in [-1 ] == ':' ) && in [1 ] == '$' )
228
+ in ++ ;
229
+ else
230
+ * out ++ = * in ++ ;
231
+ }
232
+ else if (* in == '.' )
233
+ {
234
+ if (in [1 ] == '.' )
235
+ {
236
+ /* ".." becomes "::" */
237
+ * out ++ = ':' ;
238
+ * out ++ = ':' ;
239
+ in += 2 ;
240
+ }
241
+ else
242
+ {
243
+ /* "." becomes "-" */
244
+ * out ++ = '-' ;
245
+ in ++ ;
246
+ }
247
+ }
248
+ else if (* in == ':' || ISALNUM (* in ))
249
+ * out ++ = * in ++ ;
250
+ else
251
+ /* unexpected character in symbol, not looks_like_rust. */
252
+ goto fail ;
253
+ }
327
254
goto done ;
328
255
329
256
fail :
@@ -332,18 +259,78 @@ rust_demangle_sym (char *sym)
332
259
* out = '\0' ;
333
260
}
334
261
262
+ /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
335
263
static int
336
- unescape ( const char * * in , char * * out , const char * seq , char value )
264
+ parse_lower_hex_nibble ( char nibble )
337
265
{
338
- size_t len = strlen (seq );
266
+ if ('0' <= nibble && nibble <= '9' )
267
+ return nibble - '0' ;
268
+ if ('a' <= nibble && nibble <= 'f' )
269
+ return 0xa + (nibble - 'a' );
270
+ return -1 ;
271
+ }
339
272
340
- if (strncmp (* in , seq , len ))
341
- return 0 ;
273
+ /* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
274
+ static char
275
+ parse_legacy_escape (const char * * in )
276
+ {
277
+ char c = 0 ;
278
+ const char * e ;
279
+ size_t escape_len = 0 ;
280
+ int lo_nibble = -1 , hi_nibble = -1 ;
342
281
343
- * * out = value ;
282
+ if ((* in )[0 ] != '$' )
283
+ return 0 ;
344
284
345
- * in += len ;
346
- * out += 1 ;
285
+ e = * in + 1 ;
286
+
287
+ if (e [0 ] == 'C' )
288
+ {
289
+ escape_len = 1 ;
290
+
291
+ c = ',' ;
292
+ }
293
+ else
294
+ {
295
+ escape_len = 2 ;
296
+
297
+ if (e [0 ] == 'S' && e [1 ] == 'P' )
298
+ c = '@' ;
299
+ else if (e [0 ] == 'B' && e [1 ] == 'P' )
300
+ c = '*' ;
301
+ else if (e [0 ] == 'R' && e [1 ] == 'F' )
302
+ c = '&' ;
303
+ else if (e [0 ] == 'L' && e [1 ] == 'T' )
304
+ c = '<' ;
305
+ else if (e [0 ] == 'G' && e [1 ] == 'T' )
306
+ c = '>' ;
307
+ else if (e [0 ] == 'L' && e [1 ] == 'P' )
308
+ c = '(' ;
309
+ else if (e [0 ] == 'R' && e [1 ] == 'P' )
310
+ c = ')' ;
311
+ else if (e [0 ] == 'u' )
312
+ {
313
+ escape_len = 3 ;
314
+
315
+ hi_nibble = parse_lower_hex_nibble (e [1 ]);
316
+ if (hi_nibble < 0 )
317
+ return 0 ;
318
+ lo_nibble = parse_lower_hex_nibble (e [2 ]);
319
+ if (lo_nibble < 0 )
320
+ return 0 ;
321
+
322
+ /* Only allow non-control ASCII characters. */
323
+ if (hi_nibble > 7 )
324
+ return 0 ;
325
+ c = (hi_nibble << 4 ) | lo_nibble ;
326
+ if (c < 0x20 )
327
+ return 0 ;
328
+ }
329
+ }
330
+
331
+ if (!c || e [escape_len ] != '$' )
332
+ return 0 ;
347
333
348
- return 1 ;
334
+ * in += 2 + escape_len ;
335
+ return c ;
349
336
}
0 commit comments