@@ -351,16 +351,19 @@ void Lexer::skipToEndOfLine(bool EatNewline) {
351
351
}
352
352
break ; // Otherwise, eat other characters.
353
353
case 0 :
354
- // If this is a random nul character in the middle of a buffer, skip it as
355
- // whitespace.
356
- if (CurPtr-1 != BufferEnd) {
354
+ switch (getNulCharacterKind (CurPtr - 1 )) {
355
+ case NulCharacterKind::Embedded:
356
+ // If this is a random nul character in the middle of a buffer, skip it
357
+ // as whitespace.
357
358
diagnoseEmbeddedNul (Diags, CurPtr-1 );
358
- break ;
359
+ LLVM_FALLTHROUGH;
360
+ case NulCharacterKind::CodeCompletion:
361
+ continue ;
362
+ case NulCharacterKind::BufferEnd:
363
+ // Otherwise, the last line of the file does not have a newline.
364
+ --CurPtr;
365
+ return ;
359
366
}
360
-
361
- // Otherwise, the last line of the file does not have a newline.
362
- --CurPtr;
363
- return ;
364
367
}
365
368
}
366
369
}
@@ -422,26 +425,30 @@ void Lexer::skipSlashStarComment() {
422
425
423
426
break ; // Otherwise, eat other characters.
424
427
case 0 :
425
- // If this is a random nul character in the middle of a buffer, skip it as
426
- // whitespace.
427
- if (CurPtr-1 != BufferEnd) {
428
- diagnoseEmbeddedNul (Diags, CurPtr-1 );
429
- break ;
430
- }
431
-
432
- // Otherwise, we have an unterminated /* comment.
433
- --CurPtr;
428
+ switch (getNulCharacterKind (CurPtr - 1 )) {
429
+ case NulCharacterKind::Embedded:
430
+ // If this is a random nul character in the middle of a buffer, skip it
431
+ // as whitespace.
432
+ diagnoseEmbeddedNul (Diags, CurPtr - 1 );
433
+ LLVM_FALLTHROUGH;
434
+ case NulCharacterKind::CodeCompletion:
435
+ continue ;
436
+ case NulCharacterKind::BufferEnd: {
437
+ // Otherwise, we have an unterminated /* comment.
438
+ --CurPtr;
434
439
435
- // Count how many levels deep we are.
436
- llvm::SmallString<8 > Terminator (" */" );
437
- while (--Depth != 0 )
438
- Terminator += " */" ;
440
+ // Count how many levels deep we are.
441
+ llvm::SmallString<8 > Terminator (" */" );
442
+ while (--Depth != 0 )
443
+ Terminator += " */" ;
439
444
440
- const char *EOL = (CurPtr[-1 ] == ' \n ' ) ? (CurPtr - 1 ) : CurPtr;
441
- diagnose (EOL, diag::lex_unterminated_block_comment)
442
- .fixItInsert (getSourceLoc (EOL), Terminator);
443
- diagnose (StartPtr, diag::lex_comment_start);
444
- return ;
445
+ const char *EOL = (CurPtr[-1 ] == ' \n ' ) ? (CurPtr - 1 ) : CurPtr;
446
+ diagnose (EOL, diag::lex_unterminated_block_comment)
447
+ .fixItInsert (getSourceLoc (EOL), Terminator);
448
+ diagnose (StartPtr, diag::lex_comment_start);
449
+ return ;
450
+ }
451
+ }
445
452
}
446
453
}
447
454
}
@@ -1857,6 +1864,16 @@ bool Lexer::tryLexConflictMarker(bool EatNewline) {
1857
1864
return false ;
1858
1865
}
1859
1866
1867
+ Lexer::NulCharacterKind Lexer::getNulCharacterKind (const char *Ptr ) const {
1868
+ assert (Ptr != nullptr && *Ptr == 0 );
1869
+ if (Ptr == CodeCompletionPtr) {
1870
+ return NulCharacterKind::CodeCompletion;
1871
+ }
1872
+ if (Ptr == BufferEnd) {
1873
+ return NulCharacterKind::BufferEnd;
1874
+ }
1875
+ return NulCharacterKind::Embedded;
1876
+ }
1860
1877
1861
1878
void Lexer::tryLexEditorPlaceholder () {
1862
1879
assert (CurPtr[-1 ] == ' <' && CurPtr[0 ] == ' #' );
@@ -2164,22 +2181,23 @@ void Lexer::lexImpl() {
2164
2181
return formToken (tok::unknown, TokStart);
2165
2182
2166
2183
case 0 :
2167
- if (CurPtr-1 == CodeCompletionPtr)
2184
+ switch (getNulCharacterKind (CurPtr - 1 )) {
2185
+ case NulCharacterKind::CodeCompletion:
2168
2186
return formToken (tok::code_complete, TokStart);
2169
2187
2170
- // If this is a random nul character in the middle of a buffer, skip it as
2171
- // whitespace.
2172
- if (CurPtr- 1 != BufferEnd) {
2188
+ case NulCharacterKind::Embedded:
2189
+ // If this is a random nul character in the middle of a buffer, skip it as
2190
+ // whitespace.
2173
2191
diagnoseEmbeddedNul (Diags, CurPtr-1 );
2174
2192
goto Restart;
2193
+ case NulCharacterKind::BufferEnd:
2194
+ // Otherwise, this is the real end of the buffer. Put CurPtr back into
2195
+ // buffer bounds.
2196
+ --CurPtr;
2197
+ // Return EOF.
2198
+ return formToken (tok::eof, TokStart);
2175
2199
}
2176
2200
2177
- // Otherwise, this is the real end of the buffer. Put CurPtr back into
2178
- // buffer bounds.
2179
- --CurPtr;
2180
- // Return EOF.
2181
- return formToken (tok::eof, TokStart);
2182
-
2183
2201
case ' @' : return formToken (tok::at_sign, TokStart);
2184
2202
case ' {' : return formToken (tok::l_brace, TokStart);
2185
2203
case ' [' : {
@@ -2323,7 +2341,6 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
2323
2341
Restart:
2324
2342
const char *TriviaStart = CurPtr;
2325
2343
2326
- // TODO: Handle random nul('\0') character in the middle of a buffer.
2327
2344
// TODO: Handle invalid UTF8 sequence which is skipped in lexImpl().
2328
2345
switch (*CurPtr++) {
2329
2346
case ' \n ' :
@@ -2403,6 +2420,19 @@ void Lexer::lexTrivia(syntax::Trivia &Pieces, bool IsForTrailingTrivia) {
2403
2420
goto Restart;
2404
2421
}
2405
2422
break ;
2423
+ case 0 :
2424
+ switch (getNulCharacterKind (CurPtr - 1 )) {
2425
+ case NulCharacterKind::Embedded: {
2426
+ diagnoseEmbeddedNul (Diags, CurPtr - 1 );
2427
+ size_t Length = CurPtr - TriviaStart;
2428
+ Pieces.push_back (TriviaPiece::garbageText ({TriviaStart, Length}));
2429
+ goto Restart;
2430
+ }
2431
+ case NulCharacterKind::CodeCompletion:
2432
+ case NulCharacterKind::BufferEnd:
2433
+ break ;
2434
+ }
2435
+ break ;
2406
2436
default :
2407
2437
break ;
2408
2438
}
0 commit comments