Skip to content

RegexArrayShapeMatcher - more precise subject types #3897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 17 commits into from
Mar 24, 2025
35 changes: 26 additions & 9 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,17 @@ private function matchPatternType(Type $patternType, ?Type $flagsType, TrinaryLo
*/
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched, bool $matchesAll): ?Type
{
$parseResult = $this->regexGroupParser->parseGroups($regex);
if ($parseResult === null) {
$astWalkResult = $this->regexGroupParser->parseGroups($regex);
if ($astWalkResult === null) {
// regex could not be parsed by Hoa/Regex
return null;
}
[$groupList, $markVerbs] = $parseResult;
$groupList = $astWalkResult->getCapturingGroups();
$markVerbs = $astWalkResult->getMarkVerbs();
$subjectBaseType = new StringType();
if ($wasMatched->yes()) {
$subjectBaseType = $astWalkResult->getSubjectBaseType();
}

$regexGroupList = new RegexGroupList($groupList);
$trailingOptionals = $regexGroupList->countTrailingOptionals();
Expand All @@ -130,6 +135,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
$regexGroupList = $regexGroupList->forceGroupNonOptional($onlyOptionalTopLevelGroup);

$combiType = $this->buildArrayType(
$subjectBaseType,
$regexGroupList,
$wasMatched,
$trailingOptionals,
Expand All @@ -141,7 +147,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
if (!$this->containsUnmatchedAsNull($flags, $matchesAll)) {
// positive match has a subject but not any capturing group
$combiType = TypeCombinator::union(
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll)], [1], [], TrinaryLogic::createYes()),
$combiType,
);
}
Expand Down Expand Up @@ -180,6 +186,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
}

$combiType = $this->buildArrayType(
$subjectBaseType,
$comboList,
$wasMatched,
$trailingOptionals,
Expand All @@ -199,7 +206,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
)
) {
// positive match has a subject but not any capturing group
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
$combiTypes[] = new ConstantArrayType([new ConstantIntegerType(0)], [$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll)], [1], [], TrinaryLogic::createYes());
}

return TypeCombinator::union(...$combiTypes);
Expand All @@ -208,6 +215,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
// the general case, which should work in all cases but does not yield the most
// precise result possible in some cases
return $this->buildArrayType(
$subjectBaseType,
$regexGroupList,
$wasMatched,
$trailingOptionals,
Expand All @@ -221,6 +229,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
* @param list<string> $markVerbs
*/
private function buildArrayType(
Type $subjectBaseType,
RegexGroupList $captureGroups,
TrinaryLogic $wasMatched,
int $trailingOptionals,
Expand All @@ -234,7 +243,7 @@ private function buildArrayType(
// first item in matches contains the overall match.
$builder->setOffsetValueType(
$this->getKeyType(0),
$this->createSubjectValueType($flags, $matchesAll),
$this->createSubjectValueType($subjectBaseType, $flags, $matchesAll),
$this->isSubjectOptional($wasMatched, $matchesAll),
);

Expand Down Expand Up @@ -298,13 +307,21 @@ private function isSubjectOptional(TrinaryLogic $wasMatched, bool $matchesAll):
return !$wasMatched->yes();
}

private function createSubjectValueType(int $flags, bool $matchesAll): Type
/**
* @param Type $baseType A string type (or string variant) representing the subject of the match
*/
private function createSubjectValueType(Type $baseType, int $flags, bool $matchesAll): Type
{
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));
$subjectValueType = TypeCombinator::removeNull($this->getValueType($baseType, $flags, $matchesAll));

if ($matchesAll) {
$subjectValueType = TypeCombinator::removeNull($this->getValueType(new StringType(), $flags, $matchesAll));

if ($this->containsPatternOrder($flags)) {
$subjectValueType = TypeCombinator::intersect(new ArrayType(new IntegerType(), $subjectValueType), new AccessoryArrayListType());
$subjectValueType = TypeCombinator::intersect(
new ArrayType(new IntegerType(), $subjectValueType),
new AccessoryArrayListType(),
);
}
}

Expand Down
25 changes: 25 additions & 0 deletions src/Type/Regex/RegexAstWalkResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

namespace PHPStan\Type\Regex;

use PHPStan\Type\StringType;
use PHPStan\Type\Type;

/** @immutable */
final class RegexAstWalkResult
{
Expand All @@ -15,6 +18,7 @@ public function __construct(
private int $captureGroupId,
private array $capturingGroups,
private array $markVerbs,
private Type $subjectBaseType,
)
{
}
Expand All @@ -27,6 +31,7 @@ public static function createEmpty(): self
100,
[],
[],
new StringType(),
);
}

Expand All @@ -37,6 +42,7 @@ public function nextAlternationId(): self
$this->captureGroupId,
$this->capturingGroups,
$this->markVerbs,
$this->subjectBaseType,
);
}

Expand All @@ -47,6 +53,7 @@ public function nextCaptureGroupId(): self
$this->captureGroupId + 1,
$this->capturingGroups,
$this->markVerbs,
$this->subjectBaseType,
);
}

Expand All @@ -60,6 +67,7 @@ public function addCapturingGroup(RegexCapturingGroup $group): self
$this->captureGroupId,
$capturingGroups,
$this->markVerbs,
$this->subjectBaseType,
);
}

Expand All @@ -73,6 +81,18 @@ public function markVerb(string $markVerb): self
$this->captureGroupId,
$this->capturingGroups,
$verbs,
$this->subjectBaseType,
);
}

public function withSubjectBaseType(Type $subjectBaseType): self
{
return new self(
$this->alternationId,
$this->captureGroupId,
$this->capturingGroups,
$this->markVerbs,
$subjectBaseType,
);
}

Expand Down Expand Up @@ -102,4 +122,9 @@ public function getMarkVerbs(): array
return $this->markVerbs;
}

public function getSubjectBaseType(): Type
{
return $this->subjectBaseType;
}

}
28 changes: 23 additions & 5 deletions src/Type/Regex/RegexGroupParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,7 @@ public function __construct(
{
}

/**
* @return array{array<int, RegexCapturingGroup>, list<string>}|null
*/
public function parseGroups(string $regex): ?array
public function parseGroups(string $regex): ?RegexAstWalkResult
{
if (self::$parser === null) {
/** @throws void */
Expand Down Expand Up @@ -105,7 +102,28 @@ public function parseGroups(string $regex): ?array
RegexAstWalkResult::createEmpty(),
);

return [$astWalkResult->getCapturingGroups(), $astWalkResult->getMarkVerbs()];
$subjectAsGroupResult = $this->walkGroupAst(
$ast,
false,
false,
$modifiers,
RegexGroupWalkResult::createEmpty(),
);

if (!$subjectAsGroupResult->mightContainEmptyStringLiteral()) {
// we could handle numeric-string, in case we know the regex is delimited by ^ and $
if ($subjectAsGroupResult->isNonFalsy()->yes()) {
$astWalkResult = $astWalkResult->withSubjectBaseType(
TypeCombinator::intersect(new StringType(), new AccessoryNonFalsyStringType()),
);
} elseif ($subjectAsGroupResult->isNonEmpty()->yes()) {
$astWalkResult = $astWalkResult->withSubjectBaseType(
TypeCombinator::intersect(new StringType(), new AccessoryNonEmptyStringType()),
);
}
}

return $astWalkResult;
}

private function createEmptyTokenTreeNode(TreeNode $parentAst): TreeNode
Expand Down
14 changes: 14 additions & 0 deletions src/Type/Regex/RegexGroupWalkResult.php
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@ public function getOnlyLiterals(): ?array
return $this->onlyLiterals;
}

public function mightContainEmptyStringLiteral(): bool
{
if ($this->onlyLiterals === null) {
return false;
}
foreach ($this->onlyLiterals as $onlyLiteral) {
if ($onlyLiteral === '') {
return true;
}
}

return false;
}

public function isNonEmpty(): TrinaryLogic
{
return $this->isNonEmpty;
Expand Down
12 changes: 6 additions & 6 deletions tests/PHPStan/Analyser/nsrt/bug-11293.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,21 @@ class HelloWorld
public function sayHello(string $s): void
{
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) > 0) {
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}

public function sayHello2(string $s): void
{
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) === 1) {
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}

public function sayHello3(string $s): void
{
if (preg_match('/data-(\d{6})\.json$/', $s, $matches) >= 1) {
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}

Expand All @@ -35,7 +35,7 @@ public function sayHello4(string $s): void
return;
}

assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}

public function sayHello5(string $s): void
Expand All @@ -46,7 +46,7 @@ public function sayHello5(string $s): void
return;
}

assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}

public function sayHello6(string $s): void
Expand All @@ -57,6 +57,6 @@ public function sayHello6(string $s): void
return;
}

assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{non-falsy-string, non-falsy-string&numeric-string}', $matches);
}
}
Loading
Loading