Skip to content

Commit 42bf58b

Browse files
eddybJeff Law
authored and
Jeff Law
committed
rust-demangle.c (unescape): Remove.
* rust-demangle.c (unescape): Remove. (parse_lower_hex_nibble): New function. (parse_legacy_escape): New function. (is_prefixed_hash): Use parse_lower_hex_nibble. (looks_like_rust): Use parse_legacy_escape. (rust_demangle_sym): Use parse_legacy_escape. * testsuite/rust-demangle-expected: Add 'llv$u6d$' test. From-SVN: r275353
1 parent 5f76ab1 commit 42bf58b

File tree

3 files changed

+148
-147
lines changed

3 files changed

+148
-147
lines changed

libiberty/ChangeLog

+10
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
2019-09-03 Eduard-Mihai Burtescu <[email protected]>
2+
3+
* rust-demangle.c (unescape): Remove.
4+
(parse_lower_hex_nibble): New function.
5+
(parse_legacy_escape): New function.
6+
(is_prefixed_hash): Use parse_lower_hex_nibble.
7+
(looks_like_rust): Use parse_legacy_escape.
8+
(rust_demangle_sym): Use parse_legacy_escape.
9+
* testsuite/rust-demangle-expected: Add 'llv$u6d$' test.
10+
111
2019-08-27 Martin Liska <[email protected]>
212

313
PR lto/91478

libiberty/rust-demangle.c

+134-147
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ extern void *memset(void *s, int c, size_t n);
5050
#include "rust-demangle.h"
5151

5252

53-
/* Mangled Rust symbols look like this:
53+
/* Mangled (legacy) Rust symbols look like this:
5454
_$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
5555
5656
The original symbol is:
@@ -74,16 +74,7 @@ extern void *memset(void *s, int c, size_t n);
7474
">" => $GT$
7575
"(" => $LP$
7676
")" => $RP$
77-
" " => $u20$
78-
"\"" => $u22$
79-
"'" => $u27$
80-
"+" => $u2b$
81-
";" => $u3b$
82-
"[" => $u5b$
83-
"]" => $u5d$
84-
"{" => $u7b$
85-
"}" => $u7d$
86-
"~" => $u7e$
77+
"\u{XY}" => $uXY$
8778
8879
A double ".." means "::" and a single "." means "-".
8980
@@ -95,7 +86,8 @@ static const size_t hash_len = 16;
9586

9687
static int is_prefixed_hash (const char *start);
9788
static int looks_like_rust (const char *sym, size_t len);
98-
static int unescape (const char **in, char **out, const char *seq, char value);
89+
static int parse_lower_hex_nibble (char nibble);
90+
static char parse_legacy_escape (const char **in);
9991

10092
/* INPUT: sym: symbol that has been through C++ (gnu v3) demangling
10193
@@ -149,20 +141,20 @@ is_prefixed_hash (const char *str)
149141
const char *end;
150142
char seen[16];
151143
size_t i;
152-
int count;
144+
int count, nibble;
153145

154146
if (strncmp (str, hash_prefix, hash_prefix_len))
155147
return 0;
156148
str += hash_prefix_len;
157149

158150
memset (seen, 0, sizeof(seen));
159151
for (end = str + hash_len; str < end; str++)
160-
if (*str >= '0' && *str <= '9')
161-
seen[*str - '0'] = 1;
162-
else if (*str >= 'a' && *str <= 'f')
163-
seen[*str - 'a' + 10] = 1;
164-
else
165-
return 0;
152+
{
153+
nibble = parse_lower_hex_nibble (*str);
154+
if (nibble < 0)
155+
return 0;
156+
seen[nibble] = 1;
157+
}
166158

167159
/* Count how many distinct digits seen */
168160
count = 0;
@@ -179,57 +171,17 @@ looks_like_rust (const char *str, size_t len)
179171
const char *end = str + len;
180172

181173
while (str < end)
182-
switch (*str)
183-
{
184-
case '$':
185-
if (!strncmp (str, "$C$", 3))
186-
str += 3;
187-
else if (!strncmp (str, "$SP$", 4)
188-
|| !strncmp (str, "$BP$", 4)
189-
|| !strncmp (str, "$RF$", 4)
190-
|| !strncmp (str, "$LT$", 4)
191-
|| !strncmp (str, "$GT$", 4)
192-
|| !strncmp (str, "$LP$", 4)
193-
|| !strncmp (str, "$RP$", 4))
194-
str += 4;
195-
else if (!strncmp (str, "$u20$", 5)
196-
|| !strncmp (str, "$u22$", 5)
197-
|| !strncmp (str, "$u27$", 5)
198-
|| !strncmp (str, "$u2b$", 5)
199-
|| !strncmp (str, "$u3b$", 5)
200-
|| !strncmp (str, "$u5b$", 5)
201-
|| !strncmp (str, "$u5d$", 5)
202-
|| !strncmp (str, "$u7b$", 5)
203-
|| !strncmp (str, "$u7d$", 5)
204-
|| !strncmp (str, "$u7e$", 5))
205-
str += 5;
206-
else
207-
return 0;
208-
break;
209-
case '.':
210-
/* Do not allow three or more consecutive dots */
211-
if (!strncmp (str, "...", 3))
212-
return 0;
213-
/* Fall through */
214-
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
215-
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
216-
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
217-
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
218-
case 'y': case 'z':
219-
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
220-
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
221-
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
222-
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
223-
case 'Y': case 'Z':
224-
case '0': case '1': case '2': case '3': case '4': case '5':
225-
case '6': case '7': case '8': case '9':
226-
case '_':
227-
case ':':
228-
str++;
229-
break;
230-
default:
231-
return 0;
232-
}
174+
{
175+
if (*str == '$')
176+
{
177+
if (!parse_legacy_escape (&str))
178+
return 0;
179+
}
180+
else if (*str == '.' || *str == '_' || *str == ':' || ISALNUM (*str))
181+
str++;
182+
else
183+
return 0;
184+
}
233185

234186
return 1;
235187
}
@@ -246,6 +198,7 @@ rust_demangle_sym (char *sym)
246198
const char *in;
247199
char *out;
248200
const char *end;
201+
char unescaped;
249202

250203
if (!sym)
251204
return;
@@ -255,75 +208,49 @@ rust_demangle_sym (char *sym)
255208
end = sym + strlen (sym) - (hash_prefix_len + hash_len);
256209

257210
while (in < end)
258-
switch (*in)
259-
{
260-
case '$':
261-
if (!(unescape (&in, &out, "$C$", ',')
262-
|| unescape (&in, &out, "$SP$", '@')
263-
|| unescape (&in, &out, "$BP$", '*')
264-
|| unescape (&in, &out, "$RF$", '&')
265-
|| unescape (&in, &out, "$LT$", '<')
266-
|| unescape (&in, &out, "$GT$", '>')
267-
|| unescape (&in, &out, "$LP$", '(')
268-
|| unescape (&in, &out, "$RP$", ')')
269-
|| unescape (&in, &out, "$u20$", ' ')
270-
|| unescape (&in, &out, "$u22$", '\"')
271-
|| unescape (&in, &out, "$u27$", '\'')
272-
|| unescape (&in, &out, "$u2b$", '+')
273-
|| unescape (&in, &out, "$u3b$", ';')
274-
|| unescape (&in, &out, "$u5b$", '[')
275-
|| unescape (&in, &out, "$u5d$", ']')
276-
|| unescape (&in, &out, "$u7b$", '{')
277-
|| unescape (&in, &out, "$u7d$", '}')
278-
|| unescape (&in, &out, "$u7e$", '~'))) {
279-
/* unexpected escape sequence, not looks_like_rust. */
280-
goto fail;
281-
}
282-
break;
283-
case '_':
284-
/* If this is the start of a path component and the next
285-
character is an escape sequence, ignore the underscore. The
286-
mangler inserts an underscore to make sure the path
287-
component begins with a XID_Start character. */
288-
if ((in == sym || in[-1] == ':') && in[1] == '$')
289-
in++;
290-
else
291-
*out++ = *in++;
292-
break;
293-
case '.':
294-
if (in[1] == '.')
295-
{
296-
/* ".." becomes "::" */
297-
*out++ = ':';
298-
*out++ = ':';
299-
in += 2;
300-
}
301-
else
302-
{
303-
/* "." becomes "-" */
304-
*out++ = '-';
305-
in++;
306-
}
307-
break;
308-
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
309-
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
310-
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
311-
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
312-
case 'y': case 'z':
313-
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
314-
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
315-
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
316-
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
317-
case 'Y': case 'Z':
318-
case '0': case '1': case '2': case '3': case '4': case '5':
319-
case '6': case '7': case '8': case '9':
320-
case ':':
321-
*out++ = *in++;
322-
break;
323-
default:
324-
/* unexpected character in symbol, not looks_like_rust. */
325-
goto fail;
326-
}
211+
{
212+
if (*in == '$')
213+
{
214+
unescaped = parse_legacy_escape (&in);
215+
if (unescaped)
216+
*out++ = unescaped;
217+
else
218+
/* unexpected escape sequence, not looks_like_rust. */
219+
goto fail;
220+
}
221+
else if (*in == '_')
222+
{
223+
/* If this is the start of a path component and the next
224+
character is an escape sequence, ignore the underscore. The
225+
mangler inserts an underscore to make sure the path
226+
component begins with a XID_Start character. */
227+
if ((in == sym || in[-1] == ':') && in[1] == '$')
228+
in++;
229+
else
230+
*out++ = *in++;
231+
}
232+
else if (*in == '.')
233+
{
234+
if (in[1] == '.')
235+
{
236+
/* ".." becomes "::" */
237+
*out++ = ':';
238+
*out++ = ':';
239+
in += 2;
240+
}
241+
else
242+
{
243+
/* "." becomes "-" */
244+
*out++ = '-';
245+
in++;
246+
}
247+
}
248+
else if (*in == ':' || ISALNUM (*in))
249+
*out++ = *in++;
250+
else
251+
/* unexpected character in symbol, not looks_like_rust. */
252+
goto fail;
253+
}
327254
goto done;
328255

329256
fail:
@@ -332,18 +259,78 @@ rust_demangle_sym (char *sym)
332259
*out = '\0';
333260
}
334261

262+
/* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
335263
static int
336-
unescape (const char **in, char **out, const char *seq, char value)
264+
parse_lower_hex_nibble (char nibble)
337265
{
338-
size_t len = strlen (seq);
266+
if ('0' <= nibble && nibble <= '9')
267+
return nibble - '0';
268+
if ('a' <= nibble && nibble <= 'f')
269+
return 0xa + (nibble - 'a');
270+
return -1;
271+
}
339272

340-
if (strncmp (*in, seq, len))
341-
return 0;
273+
/* Return the unescaped character for a "$...$" escape, or 0 if invalid. */
274+
static char
275+
parse_legacy_escape (const char **in)
276+
{
277+
char c = 0;
278+
const char *e;
279+
size_t escape_len = 0;
280+
int lo_nibble = -1, hi_nibble = -1;
342281

343-
**out = value;
282+
if ((*in)[0] != '$')
283+
return 0;
344284

345-
*in += len;
346-
*out += 1;
285+
e = *in + 1;
286+
287+
if (e[0] == 'C')
288+
{
289+
escape_len = 1;
290+
291+
c = ',';
292+
}
293+
else
294+
{
295+
escape_len = 2;
296+
297+
if (e[0] == 'S' && e[1] == 'P')
298+
c = '@';
299+
else if (e[0] == 'B' && e[1] == 'P')
300+
c = '*';
301+
else if (e[0] == 'R' && e[1] == 'F')
302+
c = '&';
303+
else if (e[0] == 'L' && e[1] == 'T')
304+
c = '<';
305+
else if (e[0] == 'G' && e[1] == 'T')
306+
c = '>';
307+
else if (e[0] == 'L' && e[1] == 'P')
308+
c = '(';
309+
else if (e[0] == 'R' && e[1] == 'P')
310+
c = ')';
311+
else if (e[0] == 'u')
312+
{
313+
escape_len = 3;
314+
315+
hi_nibble = parse_lower_hex_nibble (e[1]);
316+
if (hi_nibble < 0)
317+
return 0;
318+
lo_nibble = parse_lower_hex_nibble (e[2]);
319+
if (lo_nibble < 0)
320+
return 0;
321+
322+
/* Only allow non-control ASCII characters. */
323+
if (hi_nibble > 7)
324+
return 0;
325+
c = (hi_nibble << 4) | lo_nibble;
326+
if (c < 0x20)
327+
return 0;
328+
}
329+
}
330+
331+
if (!c || e[escape_len] != '$')
332+
return 0;
347333

348-
return 1;
334+
*in += 2 + escape_len;
335+
return c;
349336
}

libiberty/testsuite/rust-demangle-expected

+4
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,7 @@ _ZN68_$LT$core..nonzero..NonZero$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref1
159159
--format=rust
160160
_ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f2ad4920655e85E
161161
<core::ptr::Unique<T> as core::ops::Deref>::deref
162+
#
163+
--format=rust
164+
_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE
165+
issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo

0 commit comments

Comments
 (0)