Skip to content

Commit

Permalink
Fix empty regex and empty alternation parse
Browse files Browse the repository at this point in the history
  • Loading branch information
mvorisek authored Feb 11, 2025
1 parent f5627dc commit bd1a196
Show file tree
Hide file tree
Showing 5 changed files with 142 additions and 17 deletions.
12 changes: 7 additions & 5 deletions resources/RegexGrammar.pp
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@
alternation()

alternation:
concatenation() ( ::alternation:: concatenation() #alternation )*
concatenation()? ( <alternation> concatenation()? #alternation )*

concatenation:
( internal_options() | assertion() | quantification() | condition() )
Expand All @@ -154,8 +154,8 @@
<index>
| ::assertion_reference_:: alternation() #assertioncondition
)
::_capturing:: concatenation()?
( ::alternation:: concatenation()? )?
::_capturing::
alternation()
::_capturing::

assertion:
Expand All @@ -165,7 +165,8 @@
| ::lookbehind_:: #lookbehind
| ::negative_lookbehind_:: #negativelookbehind
)
alternation() ::_capturing::
alternation()
::_capturing::

quantification:
( class() | simple() ) ( quantifier() #quantification )?
Expand Down Expand Up @@ -208,7 +209,8 @@
| ::atomic_group_:: #atomicgroup
| ::capturing_::
)
alternation() ::_capturing::
alternation()
::_capturing::

non_capturing_internal_options:
<non_capturing_internal_option>
Expand Down
20 changes: 8 additions & 12 deletions src/Command/IgnoredRegexValidator.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
use PHPStan\Type\ObjectType;
use PHPStan\Type\VerbosityLevel;
use function count;
use function str_contains;
use function str_starts_with;
use function strrpos;
use function substr;

Expand All @@ -34,19 +32,17 @@ public function validate(string $regex): IgnoredRegexValidatorResult
try {
/** @var TreeNode $ast */
$ast = $this->parser->parse($regex);
} catch (Exception $e) {
if (str_starts_with($e->getMessage(), 'Unexpected token "|" (alternation) at line 1')) {
return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|');
}
if (
str_contains($regex, '()')
&& str_starts_with($e->getMessage(), 'Unexpected token ")" (_capturing) at line 1')
) {
return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)');
}
} catch (Exception) {
return new IgnoredRegexValidatorResult([], false, false);
}

if (Strings::match($regex, '~(?<!\\\\)(?:\\\\\\\\)*\|\|~')) {
return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|');
}
if (Strings::match($regex, '~(?<!\\\\)(?:\\\\\\\\)*\(\)~')) {
return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)');
}

return new IgnoredRegexValidatorResult(
$this->getIgnoredTypes($ast),
$this->hasAnchorsInTheMiddle($ast),
Expand Down
49 changes: 49 additions & 0 deletions src/Type/Regex/RegexGroupParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use PHPStan\Type\StringType;
use PHPStan\Type\Type;
use PHPStan\Type\TypeCombinator;
use function array_values;
use function count;
use function in_array;
use function is_int;
Expand Down Expand Up @@ -84,6 +85,9 @@ public function parseGroups(string $regex): ?array
return null;
}

$this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($ast);
$this->updateCapturingAstAddEmptyToken($ast);

$captureOnlyNamed = false;
if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) {
$captureOnlyNamed = str_contains($modifiers, 'n');
Expand All @@ -104,6 +108,51 @@ public function parseGroups(string $regex): ?array
return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()];
}

private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode
{
return new TreeNode('token', ['token' => 'literal', 'value' => '', 'namespace' => 'default'], [], $parentAst);
}

private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode $ast): void
{
$children = $ast->getChildren();

foreach ($children as $i => $child) {
$this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($child);

if ($ast->getId() !== '#alternation' || $child->getValueToken() !== 'alternation') {
continue;
}

unset($children[$i]);

if ($i !== 0
&& isset($children[$i + 1])
&& $children[$i + 1]->getValueToken() !== 'alternation') {
continue;
}

$children[$i] = $this->createEmptyTokenTreeNode($ast);
}

$ast->setChildren(array_values($children));
}

private function updateCapturingAstAddEmptyToken(TreeNode $ast): void
{
foreach ($ast->getChildren() as $child) {
$this->updateCapturingAstAddEmptyToken($child);
}

if ($ast->getId() !== '#capturing' || $ast->getChildren() !== []) {
return;
}

$emptyAlternationAst = new TreeNode('#alternation', null, [], $ast);
$emptyAlternationAst->setChildren([$this->createEmptyTokenTreeNode($emptyAlternationAst)]);
$ast->setChildren([$emptyAlternationAst]);
}

private function walkRegexAst(
TreeNode $ast,
?RegexAlternation $alternation,
Expand Down
42 changes: 42 additions & 0 deletions tests/PHPStan/Analyser/nsrt/preg_match_shapes.php
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,48 @@ function bugUnescapedDashAfterRange (string $string): void
}
}

function bugEmptySubexpression (string $string): void {
if (preg_match('//', $string, $matches)) {
assertType("array{string}", $matches); // could be array{''}
}

if (preg_match('/()/', $string, $matches)) {
assertType("array{string, ''}", $matches); // could be array{'', ''}
}

if (preg_match('/|/', $string, $matches)) {
assertType("array{string}", $matches); // could be array{''}
}

if (preg_match('~|(a)~', $string, $matches)) {
assertType("array{0: string, 1?: 'a'}", $matches);
}

if (preg_match('~(a)|~', $string, $matches)) {
assertType("array{0: string, 1?: 'a'}", $matches);
}

if (preg_match('~(a)||(b)~', $string, $matches)) {
assertType("array{0: string, 1?: 'a'}|array{string, '', 'b'}", $matches);
}

if (preg_match('~(|(a))~', $string, $matches)) {
assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches);
}

if (preg_match('~((a)|)~', $string, $matches)) {
assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches);
}

if (preg_match('~((a)||(b))~', $string, $matches)) {
assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: 'b'}", $matches);
}

if (preg_match('~((a)|()|(b))~', $string, $matches)) {
assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: '', 4?: 'b'}", $matches);
}
}

function bug11744(string $string): void
{
if (!preg_match('~^((/[a-z]+)?)~', $string, $matches)) {
Expand Down
36 changes: 36 additions & 0 deletions tests/PHPStan/Command/IgnoredRegexValidatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,48 @@ public function dataValidate(): array
false,
false,
],
[
'~(a\()~',
[],
false,
false,
],
[
'~b\\\()~',
[],
false,
true,
],
[
'~(c\\\\\()~',
[],
false,
false,
],
[
'~Result of || is always true.~',
[],
false,
true,
],
[
'~a\||~',
[],
false,
false,
],
[
'~b\\\||~',
[],
false,
true,
],
[
'~c\\\\\||~',
[],
false,
false,
],
[
'#Method PragmaRX\Notified\Data\Repositories\Notified::firstOrCreateByEvent() should return PragmaRX\Notified\Data\Models\Notified but returns Illuminate\Database\Eloquent\Model|null#',
[],
Expand Down

0 comments on commit bd1a196

Please sign in to comment.