@@ -135,6 +135,7 @@ class Demangler {
135
135
void printDecimalNumber (uint64_t N);
136
136
void printBasicType (BasicType);
137
137
void printLifetime (uint64_t Index);
138
+ void printIdentifier (Identifier Ident);
138
139
139
140
char look () const ;
140
141
char consume ();
@@ -283,8 +284,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
283
284
switch (consume ()) {
284
285
case ' C' : {
285
286
parseOptionalBase62Number (' s' );
286
- Identifier Ident = parseIdentifier ();
287
- print (Ident.Name );
287
+ printIdentifier (parseIdentifier ());
288
288
break ;
289
289
}
290
290
case ' M' : {
@@ -333,7 +333,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
333
333
print (NS);
334
334
if (!Ident.empty ()) {
335
335
print (" :" );
336
- print (Ident. Name );
336
+ printIdentifier (Ident);
337
337
}
338
338
print (' #' );
339
339
printDecimalNumber (Disambiguator);
@@ -342,7 +342,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
342
342
// Implementation internal namespaces.
343
343
if (!Ident.empty ()) {
344
344
print (" ::" );
345
- print (Ident. Name );
345
+ printIdentifier (Ident);
346
346
}
347
347
}
348
348
break ;
@@ -669,6 +669,8 @@ void Demangler::demangleFnSig() {
669
669
print (" C" );
670
670
} else {
671
671
Identifier Ident = parseIdentifier ();
672
+ if (Ident.Punycode )
673
+ Error = true ;
672
674
for (char C : Ident.Name ) {
673
675
// When mangling ABI string, the "-" is replaced with "_".
674
676
if (C == ' _' )
@@ -1078,6 +1080,172 @@ void Demangler::printLifetime(uint64_t Index) {
1078
1080
}
1079
1081
}
1080
1082
1083
+ static inline bool decodePunycodeDigit (char C, size_t &Value) {
1084
+ if (isLower (C)) {
1085
+ Value = C - ' a' ;
1086
+ return true ;
1087
+ }
1088
+
1089
+ if (isDigit (C)) {
1090
+ Value = 26 + (C - ' 0' );
1091
+ return true ;
1092
+ }
1093
+
1094
+ return false ;
1095
+ }
1096
+
1097
+ static void removeNullBytes (OutputStream &Output, size_t StartIdx) {
1098
+ char *Buffer = Output.getBuffer ();
1099
+ char *Start = Buffer + StartIdx;
1100
+ char *End = Buffer + Output.getCurrentPosition ();
1101
+ Output.setCurrentPosition (std::remove (Start, End, ' \0 ' ) - Buffer);
1102
+ }
1103
+
1104
+ // Encodes code point as UTF-8 and stores results in Output. Returns false if
1105
+ // CodePoint is not a valid unicode scalar value.
1106
+ static inline bool encodeUTF8 (size_t CodePoint, char *Output) {
1107
+ if (0xD800 <= CodePoint && CodePoint <= 0xDFFF )
1108
+ return false ;
1109
+
1110
+ if (CodePoint <= 0x7F ) {
1111
+ Output[0 ] = CodePoint;
1112
+ return true ;
1113
+ }
1114
+
1115
+ if (CodePoint <= 0x7FF ) {
1116
+ Output[0 ] = 0xC0 | ((CodePoint >> 6 ) & 0x3F );
1117
+ Output[1 ] = 0x80 | (CodePoint & 0x3F );
1118
+ return true ;
1119
+ }
1120
+
1121
+ if (CodePoint <= 0xFFFF ) {
1122
+ Output[0 ] = 0xE0 | (CodePoint >> 12 );
1123
+ Output[1 ] = 0x80 | ((CodePoint >> 6 ) & 0x3F );
1124
+ Output[2 ] = 0x80 | (CodePoint & 0x3F );
1125
+ return true ;
1126
+ }
1127
+
1128
+ if (CodePoint <= 0x10FFFF ) {
1129
+ Output[0 ] = 0xF0 | (CodePoint >> 18 );
1130
+ Output[1 ] = 0x80 | ((CodePoint >> 12 ) & 0x3F );
1131
+ Output[2 ] = 0x80 | ((CodePoint >> 6 ) & 0x3F );
1132
+ Output[3 ] = 0x80 | (CodePoint & 0x3F );
1133
+ return true ;
1134
+ }
1135
+
1136
+ return false ;
1137
+ }
1138
+
1139
+ // Decodes string encoded using punycode and appends results to Output.
1140
+ // Returns true if decoding was successful.
1141
+ static bool decodePunycode (StringView Input, OutputStream &Output) {
1142
+ size_t OutputSize = Output.getCurrentPosition ();
1143
+ size_t InputIdx = 0 ;
1144
+
1145
+ // Rust uses an underscore as a delimiter.
1146
+ size_t DelimiterPos = StringView::npos;
1147
+ for (size_t I = 0 ; I != Input.size (); ++I)
1148
+ if (Input[I] == ' _' )
1149
+ DelimiterPos = I;
1150
+
1151
+ if (DelimiterPos != StringView::npos) {
1152
+ // Copy basic code points before the last delimiter to the output.
1153
+ for (; InputIdx != DelimiterPos; ++InputIdx) {
1154
+ char C = Input[InputIdx];
1155
+ if (!isValid (C))
1156
+ return false ;
1157
+ // Code points are padded with zeros while decoding is in progress.
1158
+ char UTF8[4 ] = {C};
1159
+ Output += StringView (UTF8, UTF8 + 4 );
1160
+ }
1161
+ // Skip over the delimiter.
1162
+ ++InputIdx;
1163
+ }
1164
+
1165
+ size_t Base = 36 ;
1166
+ size_t Skew = 38 ;
1167
+ size_t Bias = 72 ;
1168
+ size_t N = 0x80 ;
1169
+ size_t TMin = 1 ;
1170
+ size_t TMax = 26 ;
1171
+ size_t Damp = 700 ;
1172
+
1173
+ auto Adapt = [&](size_t Delta, size_t NumPoints) {
1174
+ Delta /= Damp;
1175
+ Delta += Delta / NumPoints;
1176
+ Damp = 2 ;
1177
+
1178
+ size_t K = 0 ;
1179
+ while (Delta > (Base - TMin) * TMax / 2 ) {
1180
+ Delta /= Base - TMin;
1181
+ K += Base;
1182
+ }
1183
+ return K + (((Base - TMin + 1 ) * Delta) / (Delta + Skew));
1184
+ };
1185
+
1186
+ // Main decoding loop.
1187
+ for (size_t I = 0 ; InputIdx != Input.size (); I += 1 ) {
1188
+ size_t OldI = I;
1189
+ size_t W = 1 ;
1190
+ size_t Max = std::numeric_limits<size_t >::max ();
1191
+ for (size_t K = Base; true ; K += Base) {
1192
+ if (InputIdx == Input.size ())
1193
+ return false ;
1194
+ char C = Input[InputIdx++];
1195
+ size_t Digit = 0 ;
1196
+ if (!decodePunycodeDigit (C, Digit))
1197
+ return false ;
1198
+
1199
+ if (Digit > (Max - I) / W)
1200
+ return false ;
1201
+ I += Digit * W;
1202
+
1203
+ size_t T;
1204
+ if (K <= Bias)
1205
+ T = TMin;
1206
+ else if (K >= Bias + TMax)
1207
+ T = TMax;
1208
+ else
1209
+ T = K - Bias;
1210
+
1211
+ if (Digit < T)
1212
+ break ;
1213
+
1214
+ if (W > Max / (Base - T))
1215
+ return false ;
1216
+ W *= (Base - T);
1217
+ }
1218
+ size_t NumPoints = (Output.getCurrentPosition () - OutputSize) / 4 + 1 ;
1219
+ Bias = Adapt (I - OldI, NumPoints);
1220
+
1221
+ if (I / NumPoints > Max - N)
1222
+ return false ;
1223
+ N += I / NumPoints;
1224
+ I = I % NumPoints;
1225
+
1226
+ // Insert N at position I in the output.
1227
+ char UTF8[4 ] = {};
1228
+ if (!encodeUTF8 (N, UTF8))
1229
+ return false ;
1230
+ Output.insert (OutputSize + I * 4 , UTF8, 4 );
1231
+ }
1232
+
1233
+ removeNullBytes (Output, OutputSize);
1234
+ return true ;
1235
+ }
1236
+
1237
+ void Demangler::printIdentifier (Identifier Ident) {
1238
+ if (Error || !Print)
1239
+ return ;
1240
+
1241
+ if (Ident.Punycode ) {
1242
+ if (!decodePunycode (Ident.Name , Output))
1243
+ Error = true ;
1244
+ } else {
1245
+ print (Ident.Name );
1246
+ }
1247
+ }
1248
+
1081
1249
char Demangler::look () const {
1082
1250
if (Error || Position >= Input.size ())
1083
1251
return 0 ;
0 commit comments