Skip to content

Commit fb351b3

Browse files
authoredMay 22, 2024··
Merge pull request #508 from PHPCSStandards/feature/tokenizer-php-harden-the-dnf-layer-some-more
Tokenizer/PHP: add extra hardening to the (DNF) type handling + efficiency improvement
2 parents 027c0cb + 83afad8 commit fb351b3

File tree

5 files changed

+379
-10
lines changed

5 files changed

+379
-10
lines changed
 

‎src/Tokenizers/PHP.php

+27-10
Original file line numberDiff line numberDiff line change
@@ -2602,7 +2602,9 @@ protected function processAdditional()
26022602

26032603
$this->createAttributesNestingMap();
26042604

2605-
$numTokens = count($this->tokens);
2605+
$numTokens = count($this->tokens);
2606+
$lastSeenTypeToken = $numTokens;
2607+
26062608
for ($i = ($numTokens - 1); $i >= 0; $i--) {
26072609
// Check for any unset scope conditions due to alternate IF/ENDIF syntax.
26082610
if (isset($this->tokens[$i]['scope_opener']) === true
@@ -3036,9 +3038,14 @@ protected function processAdditional()
30363038
continue;
30373039
} else if ($this->tokens[$i]['code'] === T_BITWISE_OR
30383040
|| $this->tokens[$i]['code'] === T_BITWISE_AND
3039-
|| $this->tokens[$i]['code'] === T_OPEN_PARENTHESIS
30403041
|| $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
30413042
) {
3043+
if ($lastSeenTypeToken < $i) {
3044+
// We've already examined this code to check if it is a type declaration and concluded it wasn't.
3045+
// No need to do it again.
3046+
continue;
3047+
}
3048+
30423049
/*
30433050
Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR.
30443051
Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND.
@@ -3133,9 +3140,14 @@ protected function processAdditional()
31333140

31343141
$typeTokenCountBefore = 0;
31353142
$typeOperators = [$i];
3143+
$parenthesesCount = 0;
31363144
$confirmed = false;
31373145
$maybeNullable = null;
31383146

3147+
if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
3148+
++$parenthesesCount;
3149+
}
3150+
31393151
for ($x = ($i - 1); $x >= 0; $x--) {
31403152
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
31413153
continue;
@@ -3167,7 +3179,7 @@ protected function processAdditional()
31673179
$confirmed = true;
31683180
break;
31693181
} else {
3170-
// This may still be an arrow function which hasn't be handled yet.
3182+
// This may still be an arrow function which hasn't been handled yet.
31713183
for ($y = ($x - 1); $y > 0; $y--) {
31723184
if (isset(Tokens::$emptyTokens[$this->tokens[$y]['code']]) === false
31733185
&& $this->tokens[$y]['code'] !== T_BITWISE_AND
@@ -3202,11 +3214,13 @@ protected function processAdditional()
32023214
continue;
32033215
}
32043216

3205-
if ($this->tokens[$x]['code'] === T_BITWISE_OR
3206-
|| $this->tokens[$x]['code'] === T_BITWISE_AND
3207-
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
3208-
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
3209-
) {
3217+
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
3218+
$typeOperators[] = $x;
3219+
continue;
3220+
}
3221+
3222+
if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
3223+
++$parenthesesCount;
32103224
$typeOperators[] = $x;
32113225
continue;
32123226
}
@@ -3244,6 +3258,9 @@ protected function processAdditional()
32443258
break;
32453259
}//end for
32463260

3261+
// Remember the last token we examined as part of the (non-)"type declaration".
3262+
$lastSeenTypeToken = $x;
3263+
32473264
if ($confirmed === false
32483265
&& $suspectedType === 'property or parameter'
32493266
&& isset($this->tokens[$i]['nested_parenthesis']) === true
@@ -3288,8 +3305,8 @@ protected function processAdditional()
32883305
unset($parens, $last);
32893306
}//end if
32903307

3291-
if ($confirmed === false) {
3292-
// Not a union or intersection type after all, move on.
3308+
if ($confirmed === false || ($parenthesesCount % 2) !== 0) {
3309+
// Not a (valid) union, intersection or DNF type after all, move on.
32933310
continue;
32943311
}
32953312

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
4+
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
5+
class ParseErrors {
6+
/* testBrokenConstDNFTypeEndOnOpenParenthesis */
7+
const A|(B PARSE_ERROR = null;
8+
9+
/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */
10+
public A|(B $parseError;
11+
12+
function unmatchedParens {
13+
/* testBrokenParamDNFTypeEndOnOpenParenthesis */
14+
A|(B $parseError,
15+
/* testBrokenReturnDNFTypeEndOnOpenParenthesis */
16+
) : A|(B {}
17+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
<?php
2+
/**
3+
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
4+
*
5+
* @author Juliette Reinders Folmer <phpcs_nospam@adviesenzo.nl>
6+
* @copyright 2024 PHPCSStandards and contributors
7+
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
8+
*/
9+
10+
namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;
11+
12+
use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
13+
14+
final class DNFTypesParseError1Test extends AbstractTokenizerTestCase
15+
{
16+
17+
18+
/**
19+
* Document handling for a DNF type / parse error where the last significant type specific token is an open parenthesis.
20+
*
21+
* @param string $testMarker The comment prefacing the target token.
22+
*
23+
* @dataProvider dataBrokenDNFTypeCantEndOnOpenParenthesis
24+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
25+
*
26+
* @return void
27+
*/
28+
public function testBrokenDNFTypeCantEndOnOpenParenthesis($testMarker)
29+
{
30+
$tokens = $this->phpcsFile->getTokens();
31+
32+
$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
33+
$token = $tokens[$openPtr];
34+
35+
// Verify that the open parenthesis is tokenized as a normal parenthesis.
36+
$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
37+
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');
38+
39+
// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
40+
// is not recognized as a valid type declaration.
41+
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
42+
$token = $tokens[$unionPtr];
43+
44+
$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
45+
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');
46+
47+
}//end testBrokenDNFTypeCantEndOnOpenParenthesis()
48+
49+
50+
/**
51+
* Data provider.
52+
*
53+
* @see testBrokenDNFTypeCantEndOnOpenParenthesis()
54+
*
55+
* @return array<string, array<string, string>>
56+
*/
57+
public static function dataBrokenDNFTypeCantEndOnOpenParenthesis()
58+
{
59+
return [
60+
'OO const type' => ['/* testBrokenConstDNFTypeEndOnOpenParenthesis */'],
61+
'OO property type' => ['/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */'],
62+
'Parameter type' => ['/* testBrokenParamDNFTypeEndOnOpenParenthesis */'],
63+
'Return type' => ['/* testBrokenReturnDNFTypeEndOnOpenParenthesis */'],
64+
];
65+
66+
}//end dataBrokenDNFTypeCantEndOnOpenParenthesis()
67+
68+
69+
}//end class
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
<?php
2+
3+
// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
4+
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
5+
//
6+
// Also note that the order of these tests is deliberate to try and trick the parentheses handling
7+
// in the Tokenizer class into matching parentheses pairs, even though the parentheses do
8+
// no belong together.
9+
10+
class UnmatchedParentheses {
11+
/* testBrokenConstDNFTypeParensMissingClose */
12+
const A|(B&C PARSE_ERROR_1 = null;
13+
14+
/* testBrokenConstDNFTypeParensMissingOpen */
15+
const A|B&C) PARSE_ERROR_2 = null;
16+
17+
/* testBrokenPropertyDNFTypeParensMissingClose */
18+
private A|(B&C $parseError1;
19+
20+
/* testBrokenPropertyDNFTypeParensMissingOpen */
21+
protected A|B&C) $parseError2;
22+
23+
function unmatchedParens1 (
24+
/* testBrokenParamDNFTypeParensMissingClose */
25+
A|(B&C $parseError,
26+
/* testBrokenReturnDNFTypeParensMissingOpen */
27+
) : A|B&C) {}
28+
29+
function unmatchedParens2 (
30+
/* testBrokenParamDNFTypeParensMissingOpen */
31+
A|B&C) $parseError
32+
/* testBrokenReturnDNFTypeParensMissingClose */
33+
) : A|(B&C {}
34+
}
35+
36+
class MatchedAndUnmatchedParentheses {
37+
/* testBrokenConstDNFTypeParensMissingOneClose */
38+
const (A&B)|(B&C PARSE_ERROR = null;
39+
40+
/* testBrokenPropertyDNFTypeParensMissingOneOpen */
41+
protected (A&B)|B&C) $parseError;
42+
43+
function unmatchedParens (
44+
/* testBrokenParamDNFTypeParensMissingOneClose */
45+
(A&B)|(B&C $parseError,
46+
/* testBrokenReturnDNFTypeParensMissingOneOpen */
47+
) : (A&B)|B&C) {}
48+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
<?php
2+
/**
3+
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
4+
*
5+
* @author Juliette Reinders Folmer <phpcs_nospam@adviesenzo.nl>
6+
* @copyright 2024 PHPCSStandards and contributors
7+
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
8+
*/
9+
10+
namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;
11+
12+
use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;
13+
use PHP_CodeSniffer\Util\Tokens;
14+
15+
final class DNFTypesParseError2Test extends AbstractTokenizerTestCase
16+
{
17+
18+
19+
/**
20+
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis.
21+
*
22+
* @param string $testMarker The comment prefacing the target token.
23+
*
24+
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens
25+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
26+
*
27+
* @return void
28+
*/
29+
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens($testMarker)
30+
{
31+
$tokens = $this->phpcsFile->getTokens();
32+
33+
// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
34+
// is not recognized as a valid type declaration.
35+
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
36+
$token = $tokens[$unionPtr];
37+
38+
$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
39+
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');
40+
41+
// Verify that the unmatched open parenthesis is tokenized as a normal parenthesis.
42+
$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
43+
$token = $tokens[$openPtr];
44+
45+
$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
46+
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');
47+
48+
// Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration
49+
// is not recognized as a valid type declaration.
50+
$intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&');
51+
$token = $tokens[$intersectPtr];
52+
53+
$this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)');
54+
$this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)');
55+
56+
}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
57+
58+
59+
/**
60+
* Data provider.
61+
*
62+
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
63+
*
64+
* @return array<string, array<string, string>>
65+
*/
66+
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
67+
{
68+
return [
69+
'OO const type' => ['/* testBrokenConstDNFTypeParensMissingClose */'],
70+
'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingClose */'],
71+
'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingClose */'],
72+
'Return type' => ['/* testBrokenReturnDNFTypeParensMissingClose */'],
73+
];
74+
75+
}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingCloseParens()
76+
77+
78+
/**
79+
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis.
80+
*
81+
* @param string $testMarker The comment prefacing the target token.
82+
*
83+
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens
84+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
85+
*
86+
* @return void
87+
*/
88+
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens($testMarker)
89+
{
90+
$tokens = $this->phpcsFile->getTokens();
91+
92+
// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
93+
// is not recognized as a valid type declaration.
94+
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
95+
$token = $tokens[$unionPtr];
96+
97+
$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
98+
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');
99+
100+
// Verify that the unmatched open parenthesis is tokenized as a normal parenthesis.
101+
$closePtr = $this->getTargetToken($testMarker, [T_CLOSE_PARENTHESIS, T_TYPE_CLOSE_PARENTHESIS], ')');
102+
$token = $tokens[$closePtr];
103+
104+
$this->assertSame(T_CLOSE_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (code)');
105+
$this->assertSame('T_CLOSE_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_CLOSE_PARENTHESIS (type)');
106+
107+
// Verify that the type intersection is still tokenized as T_BITWISE_AND as the type declaration
108+
// is not recognized as a valid type declaration.
109+
$intersectPtr = $this->getTargetToken($testMarker, [T_BITWISE_AND, T_TYPE_INTERSECTION], '&');
110+
$token = $tokens[$intersectPtr];
111+
112+
$this->assertSame(T_BITWISE_AND, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (code)');
113+
$this->assertSame('T_BITWISE_AND', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_AND (type)');
114+
115+
}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
116+
117+
118+
/**
119+
* Data provider.
120+
*
121+
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
122+
*
123+
* @return array<string, array<string, string>>
124+
*/
125+
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
126+
{
127+
return [
128+
'OO const type' => ['/* testBrokenConstDNFTypeParensMissingOpen */'],
129+
'OO property type' => ['/* testBrokenPropertyDNFTypeParensMissingOpen */'],
130+
'Parameter type' => ['/* testBrokenParamDNFTypeParensMissingOpen */'],
131+
'Return type' => ['/* testBrokenReturnDNFTypeParensMissingOpen */'],
132+
];
133+
134+
}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMissingOpenParens()
135+
136+
137+
/**
138+
* Document handling for a DNF type / parse error where the type declaration contains an unmatched parenthesis,
139+
* but also contains a set of matched parentheses.
140+
*
141+
* @param string $testMarker The comment prefacing the target token.
142+
*
143+
* @dataProvider dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched
144+
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
145+
*
146+
* @return void
147+
*/
148+
public function testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched($testMarker)
149+
{
150+
$tokens = $this->phpcsFile->getTokens();
151+
$startPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
152+
153+
for ($i = $startPtr; $i < $this->phpcsFile->numTokens; $i++) {
154+
if (isset(Tokens::$emptyTokens[$tokens[$i]['code']]) === true) {
155+
continue;
156+
}
157+
158+
if ($tokens[$i]['code'] === T_EQUAL
159+
|| $tokens[$i]['code'] === T_VARIABLE
160+
|| $tokens[$i]['code'] === T_OPEN_CURLY_BRACKET
161+
) {
162+
// Reached the end of the type.
163+
break;
164+
}
165+
166+
$errorPrefix = 'Token tokenized as '.$tokens[$i]['type'];
167+
168+
// Verify that type tokens have not been retokenized to `T_TYPE_*` tokens for broken type declarations.
169+
switch ($tokens[$i]['content']) {
170+
case '|':
171+
$this->assertSame(T_BITWISE_OR, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_OR (code)');
172+
$this->assertSame('T_BITWISE_OR', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_OR (type)');
173+
break;
174+
175+
case '&':
176+
$this->assertSame(T_BITWISE_AND, $tokens[$i]['code'], $errorPrefix.', not T_BITWISE_AND (code)');
177+
$this->assertSame('T_BITWISE_AND', $tokens[$i]['type'], $errorPrefix.', not T_BITWISE_AND (type)');
178+
break;
179+
180+
case '(':
181+
// Verify that the open parenthesis is tokenized as a normal parenthesis.
182+
$this->assertSame(T_OPEN_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_OPEN_PARENTHESIS (code)');
183+
$this->assertSame('T_OPEN_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_OPEN_PARENTHESIS (type)');
184+
break;
185+
186+
case ')':
187+
$this->assertSame(T_CLOSE_PARENTHESIS, $tokens[$i]['code'], $errorPrefix.', not T_CLOSE_PARENTHESIS (code)');
188+
$this->assertSame('T_CLOSE_PARENTHESIS', $tokens[$i]['type'], $errorPrefix.', not T_CLOSE_PARENTHESIS (type)');
189+
break;
190+
191+
default:
192+
break;
193+
}//end switch
194+
}//end for
195+
196+
}//end testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
197+
198+
199+
/**
200+
* Data provider.
201+
*
202+
* @see testBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
203+
*
204+
* @return array<string, array<string, string>>
205+
*/
206+
public static function dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
207+
{
208+
return [
209+
'OO const type - missing one close parenthesis' => ['/* testBrokenConstDNFTypeParensMissingOneClose */'],
210+
'OO property type - missing one open parenthesis' => ['/* testBrokenPropertyDNFTypeParensMissingOneOpen */'],
211+
'Parameter type - missing one close parenthesis' => ['/* testBrokenParamDNFTypeParensMissingOneClose */'],
212+
'Return type - missing one open parenthesis' => ['/* testBrokenReturnDNFTypeParensMissingOneOpen */'],
213+
];
214+
215+
}//end dataBrokenDNFTypeParensShouldAlwaysBeAPairMatchedAndUnmatched()
216+
217+
218+
}//end class

0 commit comments

Comments
 (0)
Please sign in to comment.