Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tokenizer/PHP: add extra hardening to the (DNF) type handling + efficiency improvement #508

Merged
merged 4 commits into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions src/Tokenizers/PHP.php
Original file line number Diff line number Diff line change
Expand Up @@ -2602,7 +2602,9 @@ protected function processAdditional()

$this->createAttributesNestingMap();

$numTokens = count($this->tokens);
$numTokens = count($this->tokens);
$lastSeenTypeToken = $numTokens;

for ($i = ($numTokens - 1); $i >= 0; $i--) {
// Check for any unset scope conditions due to alternate IF/ENDIF syntax.
if (isset($this->tokens[$i]['scope_opener']) === true
Expand Down Expand Up @@ -3036,9 +3038,14 @@ protected function processAdditional()
continue;
} else if ($this->tokens[$i]['code'] === T_BITWISE_OR
|| $this->tokens[$i]['code'] === T_BITWISE_AND
|| $this->tokens[$i]['code'] === T_OPEN_PARENTHESIS
|| $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS
) {
if ($lastSeenTypeToken < $i) {
// We've already examined this code to check if it is a type declaration and concluded it wasn't.
// No need to do it again.
continue;
}

/*
Convert "|" to T_TYPE_UNION or leave as T_BITWISE_OR.
Convert "&" to T_TYPE_INTERSECTION or leave as T_BITWISE_AND.
Expand Down Expand Up @@ -3133,9 +3140,14 @@ protected function processAdditional()

$typeTokenCountBefore = 0;
$typeOperators = [$i];
$parenthesesCount = 0;
$confirmed = false;
$maybeNullable = null;

if ($this->tokens[$i]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
++$parenthesesCount;
}

for ($x = ($i - 1); $x >= 0; $x--) {
if (isset(Tokens::$emptyTokens[$this->tokens[$x]['code']]) === true) {
continue;
Expand Down Expand Up @@ -3167,7 +3179,7 @@ protected function processAdditional()
$confirmed = true;
break;
} else {
// This may still be an arrow function which hasn't be handled yet.
// This may still be an arrow function which hasn't been handled yet.
for ($y = ($x - 1); $y > 0; $y--) {
if (isset(Tokens::$emptyTokens[$this->tokens[$y]['code']]) === false
&& $this->tokens[$y]['code'] !== T_BITWISE_AND
Expand Down Expand Up @@ -3202,11 +3214,13 @@ protected function processAdditional()
continue;
}

if ($this->tokens[$x]['code'] === T_BITWISE_OR
|| $this->tokens[$x]['code'] === T_BITWISE_AND
|| $this->tokens[$x]['code'] === T_OPEN_PARENTHESIS
|| $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS
) {
if ($this->tokens[$x]['code'] === T_BITWISE_OR || $this->tokens[$x]['code'] === T_BITWISE_AND) {
$typeOperators[] = $x;
continue;
}

if ($this->tokens[$x]['code'] === T_OPEN_PARENTHESIS || $this->tokens[$x]['code'] === T_CLOSE_PARENTHESIS) {
++$parenthesesCount;
$typeOperators[] = $x;
continue;
}
Expand Down Expand Up @@ -3244,6 +3258,9 @@ protected function processAdditional()
break;
}//end for

// Remember the last token we examined as part of the (non-)"type declaration".
$lastSeenTypeToken = $x;

if ($confirmed === false
&& $suspectedType === 'property or parameter'
&& isset($this->tokens[$i]['nested_parenthesis']) === true
Expand Down Expand Up @@ -3288,8 +3305,8 @@ protected function processAdditional()
unset($parens, $last);
}//end if

if ($confirmed === false) {
// Not a union or intersection type after all, move on.
if ($confirmed === false || ($parenthesesCount % 2) !== 0) {
// Not a (valid) union, intersection or DNF type after all, move on.
continue;
}

Expand Down
17 changes: 17 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError1Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?php

// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
class ParseErrors {
/* testBrokenConstDNFTypeEndOnOpenParenthesis */
const A|(B PARSE_ERROR = null;

/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */
public A|(B $parseError;

function unmatchedParens {
/* testBrokenParamDNFTypeEndOnOpenParenthesis */
A|(B $parseError,
/* testBrokenReturnDNFTypeEndOnOpenParenthesis */
) : A|(B {}
}
69 changes: 69 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError1Test.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
<?php
/**
* Tests that parentheses tokens are not converted to type parentheses tokens in broken DNF types.
*
* @author Juliette Reinders Folmer <[email protected]>
* @copyright 2024 PHPCSStandards and contributors
* @license https://github.com/PHPCSStandards/PHP_CodeSniffer/blob/master/licence.txt BSD Licence
*/

namespace PHP_CodeSniffer\Tests\Core\Tokenizer\PHP;

use PHP_CodeSniffer\Tests\Core\Tokenizer\AbstractTokenizerTestCase;

final class DNFTypesParseError1Test extends AbstractTokenizerTestCase
{


/**
* Document handling for a DNF type / parse error where the last significant type specific token is an open parenthesis.
*
* @param string $testMarker The comment prefacing the target token.
*
* @dataProvider dataBrokenDNFTypeCantEndOnOpenParenthesis
* @covers PHP_CodeSniffer\Tokenizers\PHP::processAdditional
*
* @return void
*/
public function testBrokenDNFTypeCantEndOnOpenParenthesis($testMarker)
{
$tokens = $this->phpcsFile->getTokens();

$openPtr = $this->getTargetToken($testMarker, [T_OPEN_PARENTHESIS, T_TYPE_OPEN_PARENTHESIS], '(');
$token = $tokens[$openPtr];

// Verify that the open parenthesis is tokenized as a normal parenthesis.
$this->assertSame(T_OPEN_PARENTHESIS, $token['code'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (code)');
$this->assertSame('T_OPEN_PARENTHESIS', $token['type'], 'Token tokenized as '.$token['type'].', not T_OPEN_PARENTHESIS (type)');

// Verify that the type union is still tokenized as T_BITWISE_OR as the type declaration
// is not recognized as a valid type declaration.
$unionPtr = $this->getTargetToken($testMarker, [T_BITWISE_OR, T_TYPE_UNION], '|');
$token = $tokens[$unionPtr];

$this->assertSame(T_BITWISE_OR, $token['code'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (code)');
$this->assertSame('T_BITWISE_OR', $token['type'], 'Token tokenized as '.$token['type'].', not T_BITWISE_OR (type)');

}//end testBrokenDNFTypeCantEndOnOpenParenthesis()


/**
* Data provider.
*
* @see testBrokenDNFTypeCantEndOnOpenParenthesis()
*
* @return array<string, array<string, string>>
*/
public static function dataBrokenDNFTypeCantEndOnOpenParenthesis()
{
return [
'OO const type' => ['/* testBrokenConstDNFTypeEndOnOpenParenthesis */'],
'OO property type' => ['/* testBrokenPropertyDNFTypeEndOnOpenParenthesis */'],
'Parameter type' => ['/* testBrokenParamDNFTypeEndOnOpenParenthesis */'],
'Return type' => ['/* testBrokenReturnDNFTypeEndOnOpenParenthesis */'],
];

}//end dataBrokenDNFTypeCantEndOnOpenParenthesis()


}//end class
48 changes: 48 additions & 0 deletions tests/Core/Tokenizer/PHP/DNFTypesParseError2Test.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
<?php

// Parentheses in broken DNF type declarations will remain tokenized as normal parentheses.
// This test is in a separate file as the 'nested_parenthesis' indexes will be off after this code.
//
// Also note that the order of these tests is deliberate to try and trick the parentheses handling
// in the Tokenizer class into matching parentheses pairs, even though the parentheses do
// no belong together.

class UnmatchedParentheses {
/* testBrokenConstDNFTypeParensMissingClose */
const A|(B&C PARSE_ERROR_1 = null;

/* testBrokenConstDNFTypeParensMissingOpen */
const A|B&C) PARSE_ERROR_2 = null;

/* testBrokenPropertyDNFTypeParensMissingClose */
private A|(B&C $parseError1;

/* testBrokenPropertyDNFTypeParensMissingOpen */
protected A|B&C) $parseError2;

function unmatchedParens1 (
/* testBrokenParamDNFTypeParensMissingClose */
A|(B&C $parseError,
/* testBrokenReturnDNFTypeParensMissingOpen */
) : A|B&C) {}

function unmatchedParens2 (
/* testBrokenParamDNFTypeParensMissingOpen */
A|B&C) $parseError
/* testBrokenReturnDNFTypeParensMissingClose */
) : A|(B&C {}
}

class MatchedAndUnmatchedParentheses {
/* testBrokenConstDNFTypeParensMissingOneClose */
const (A&B)|(B&C PARSE_ERROR = null;

/* testBrokenPropertyDNFTypeParensMissingOneOpen */
protected (A&B)|B&C) $parseError;

function unmatchedParens (
/* testBrokenParamDNFTypeParensMissingOneClose */
(A&B)|(B&C $parseError,
/* testBrokenReturnDNFTypeParensMissingOneOpen */
) : (A&B)|B&C) {}
}
Loading