Skip to content

Commit bd1a196

Browse files
authored
Fix empty regex and empty alternation parse
1 parent f5627dc commit bd1a196

File tree

5 files changed

+142
-17
lines changed

5 files changed

+142
-17
lines changed

Diff for: resources/RegexGrammar.pp

+7-5
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@
135135
alternation()
136136

137137
alternation:
138-
concatenation() ( ::alternation:: concatenation() #alternation )*
138+
concatenation()? ( <alternation> concatenation()? #alternation )*
139139

140140
concatenation:
141141
( internal_options() | assertion() | quantification() | condition() )
@@ -154,8 +154,8 @@
154154
<index>
155155
| ::assertion_reference_:: alternation() #assertioncondition
156156
)
157-
::_capturing:: concatenation()?
158-
( ::alternation:: concatenation()? )?
157+
::_capturing::
158+
alternation()
159159
::_capturing::
160160

161161
assertion:
@@ -165,7 +165,8 @@
165165
| ::lookbehind_:: #lookbehind
166166
| ::negative_lookbehind_:: #negativelookbehind
167167
)
168-
alternation() ::_capturing::
168+
alternation()
169+
::_capturing::
169170

170171
quantification:
171172
( class() | simple() ) ( quantifier() #quantification )?
@@ -208,7 +209,8 @@
208209
| ::atomic_group_:: #atomicgroup
209210
| ::capturing_::
210211
)
211-
alternation() ::_capturing::
212+
alternation()
213+
::_capturing::
212214

213215
non_capturing_internal_options:
214216
<non_capturing_internal_option>

Diff for: src/Command/IgnoredRegexValidator.php

+8-12
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
use PHPStan\Type\ObjectType;
1313
use PHPStan\Type\VerbosityLevel;
1414
use function count;
15-
use function str_contains;
16-
use function str_starts_with;
1715
use function strrpos;
1816
use function substr;
1917

@@ -34,19 +32,17 @@ public function validate(string $regex): IgnoredRegexValidatorResult
3432
try {
3533
/** @var TreeNode $ast */
3634
$ast = $this->parser->parse($regex);
37-
} catch (Exception $e) {
38-
if (str_starts_with($e->getMessage(), 'Unexpected token "|" (alternation) at line 1')) {
39-
return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|');
40-
}
41-
if (
42-
str_contains($regex, '()')
43-
&& str_starts_with($e->getMessage(), 'Unexpected token ")" (_capturing) at line 1')
44-
) {
45-
return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)');
46-
}
35+
} catch (Exception) {
4736
return new IgnoredRegexValidatorResult([], false, false);
4837
}
4938

39+
if (Strings::match($regex, '~(?<!\\\\)(?:\\\\\\\\)*\|\|~')) {
40+
return new IgnoredRegexValidatorResult([], false, true, '||', '\|\|');
41+
}
42+
if (Strings::match($regex, '~(?<!\\\\)(?:\\\\\\\\)*\(\)~')) {
43+
return new IgnoredRegexValidatorResult([], false, true, '()', '\(\)');
44+
}
45+
5046
return new IgnoredRegexValidatorResult(
5147
$this->getIgnoredTypes($ast),
5248
$this->hasAnchorsInTheMiddle($ast),

Diff for: src/Type/Regex/RegexGroupParser.php

+49
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use PHPStan\Type\StringType;
2121
use PHPStan\Type\Type;
2222
use PHPStan\Type\TypeCombinator;
23+
use function array_values;
2324
use function count;
2425
use function in_array;
2526
use function is_int;
@@ -84,6 +85,9 @@ public function parseGroups(string $regex): ?array
8485
return null;
8586
}
8687

88+
$this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($ast);
89+
$this->updateCapturingAstAddEmptyToken($ast);
90+
8791
$captureOnlyNamed = false;
8892
if ($this->phpVersion->supportsPregCaptureOnlyNamedGroups()) {
8993
$captureOnlyNamed = str_contains($modifiers, 'n');
@@ -104,6 +108,51 @@ public function parseGroups(string $regex): ?array
104108
return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()];
105109
}
106110

111+
private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode
112+
{
113+
return new TreeNode('token', ['token' => 'literal', 'value' => '', 'namespace' => 'default'], [], $parentAst);
114+
}
115+
116+
private function updateAlternationAstRemoveVerticalBarsAndAddEmptyToken(TreeNode $ast): void
117+
{
118+
$children = $ast->getChildren();
119+
120+
foreach ($children as $i => $child) {
121+
$this->updateAlternationAstRemoveVerticalBarsAndAddEmptyToken($child);
122+
123+
if ($ast->getId() !== '#alternation' || $child->getValueToken() !== 'alternation') {
124+
continue;
125+
}
126+
127+
unset($children[$i]);
128+
129+
if ($i !== 0
130+
&& isset($children[$i + 1])
131+
&& $children[$i + 1]->getValueToken() !== 'alternation') {
132+
continue;
133+
}
134+
135+
$children[$i] = $this->createEmptyTokenTreeNode($ast);
136+
}
137+
138+
$ast->setChildren(array_values($children));
139+
}
140+
141+
private function updateCapturingAstAddEmptyToken(TreeNode $ast): void
142+
{
143+
foreach ($ast->getChildren() as $child) {
144+
$this->updateCapturingAstAddEmptyToken($child);
145+
}
146+
147+
if ($ast->getId() !== '#capturing' || $ast->getChildren() !== []) {
148+
return;
149+
}
150+
151+
$emptyAlternationAst = new TreeNode('#alternation', null, [], $ast);
152+
$emptyAlternationAst->setChildren([$this->createEmptyTokenTreeNode($emptyAlternationAst)]);
153+
$ast->setChildren([$emptyAlternationAst]);
154+
}
155+
107156
private function walkRegexAst(
108157
TreeNode $ast,
109158
?RegexAlternation $alternation,

Diff for: tests/PHPStan/Analyser/nsrt/preg_match_shapes.php

+42
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,48 @@ function bugUnescapedDashAfterRange (string $string): void
901901
}
902902
}
903903

904+
function bugEmptySubexpression (string $string): void {
905+
if (preg_match('//', $string, $matches)) {
906+
assertType("array{string}", $matches); // could be array{''}
907+
}
908+
909+
if (preg_match('/()/', $string, $matches)) {
910+
assertType("array{string, ''}", $matches); // could be array{'', ''}
911+
}
912+
913+
if (preg_match('/|/', $string, $matches)) {
914+
assertType("array{string}", $matches); // could be array{''}
915+
}
916+
917+
if (preg_match('~|(a)~', $string, $matches)) {
918+
assertType("array{0: string, 1?: 'a'}", $matches);
919+
}
920+
921+
if (preg_match('~(a)|~', $string, $matches)) {
922+
assertType("array{0: string, 1?: 'a'}", $matches);
923+
}
924+
925+
if (preg_match('~(a)||(b)~', $string, $matches)) {
926+
assertType("array{0: string, 1?: 'a'}|array{string, '', 'b'}", $matches);
927+
}
928+
929+
if (preg_match('~(|(a))~', $string, $matches)) {
930+
assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches);
931+
}
932+
933+
if (preg_match('~((a)|)~', $string, $matches)) {
934+
assertType("array{0: string, 1: ''|'a', 2?: 'a'}", $matches);
935+
}
936+
937+
if (preg_match('~((a)||(b))~', $string, $matches)) {
938+
assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: 'b'}", $matches);
939+
}
940+
941+
if (preg_match('~((a)|()|(b))~', $string, $matches)) {
942+
assertType("array{0: string, 1: ''|'a'|'b', 2?: ''|'a', 3?: '', 4?: 'b'}", $matches);
943+
}
944+
}
945+
904946
function bug11744(string $string): void
905947
{
906948
if (!preg_match('~^((/[a-z]+)?)~', $string, $matches)) {

Diff for: tests/PHPStan/Command/IgnoredRegexValidatorTest.php

+36
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,48 @@ public function dataValidate(): array
100100
false,
101101
false,
102102
],
103+
[
104+
'~(a\()~',
105+
[],
106+
false,
107+
false,
108+
],
109+
[
110+
'~b\\\()~',
111+
[],
112+
false,
113+
true,
114+
],
115+
[
116+
'~(c\\\\\()~',
117+
[],
118+
false,
119+
false,
120+
],
103121
[
104122
'~Result of || is always true.~',
105123
[],
106124
false,
107125
true,
108126
],
127+
[
128+
'~a\||~',
129+
[],
130+
false,
131+
false,
132+
],
133+
[
134+
'~b\\\||~',
135+
[],
136+
false,
137+
true,
138+
],
139+
[
140+
'~c\\\\\||~',
141+
[],
142+
false,
143+
false,
144+
],
109145
[
110146
'#Method PragmaRX\Notified\Data\Repositories\Notified::firstOrCreateByEvent() should return PragmaRX\Notified\Data\Models\Notified but returns Illuminate\Database\Eloquent\Model|null#',
111147
[],

0 commit comments

Comments
 (0)