Skip to content

Commit a75cea2

Browse files
authored
Add support for mb_convert_case() and mb_convert_kana()
1 parent 730b952 commit a75cea2

File tree

3 files changed

+90
-26
lines changed

3 files changed

+90
-26
lines changed

src/Analyser/TypeSpecifier.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ private function specifyTypesForConstantStringBinaryExpression(
10461046
$context->truthy()
10471047
&& $exprNode instanceof FuncCall
10481048
&& $exprNode->name instanceof Name
1049-
&& in_array(strtolower($exprNode->name->toString()), ['substr', 'strstr', 'stristr', 'strchr', 'strrchr', 'strtolower', 'strtoupper', 'mb_strtolower', 'mb_strtoupper', 'ucfirst', 'lcfirst', 'ucwords'], true)
1049+
&& in_array(strtolower($exprNode->name->toString()), ['substr', 'strstr', 'stristr', 'strchr', 'strrchr', 'strtolower', 'strtoupper', 'mb_strtolower', 'mb_strtoupper', 'ucfirst', 'lcfirst', 'ucwords', 'mb_convert_case', 'mb_convert_kana'], true)
10501050
&& isset($exprNode->getArgs()[0])
10511051
&& $constantType->getValue() !== ''
10521052
) {

src/Type/Php/StrCaseFunctionsReturnTypeExtension.php

+52-18
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,33 @@
1717
use PHPStan\Type\Type;
1818
use PHPStan\Type\TypeCombinator;
1919
use PHPStan\Type\TypeUtils;
20+
use function array_map;
2021
use function count;
2122
use function in_array;
2223
use function is_callable;
24+
use function mb_check_encoding;
2325

2426
class StrCaseFunctionsReturnTypeExtension implements DynamicFunctionReturnTypeExtension
2527
{
2628

29+
/**
30+
* [funtion name => minimun arity]
31+
*/
32+
private const FUNCTIONS = [
33+
'strtoupper' => 1,
34+
'strtolower' => 1,
35+
'mb_strtoupper' => 1,
36+
'mb_strtolower' => 1,
37+
'lcfirst' => 1,
38+
'ucfirst' => 1,
39+
'ucwords' => 1,
40+
'mb_convert_case' => 2,
41+
'mb_convert_kana' => 1,
42+
];
43+
2744
public function isFunctionSupported(FunctionReflection $functionReflection): bool
2845
{
29-
return in_array($functionReflection->getName(), [
30-
'strtoupper',
31-
'strtolower',
32-
'mb_strtoupper',
33-
'mb_strtolower',
34-
'lcfirst',
35-
'ucfirst',
36-
'ucwords',
37-
], true);
46+
return isset(self::FUNCTIONS[$functionReflection->getName()]);
3847
}
3948

4049
public function getTypeFromFunctionCall(
@@ -43,27 +52,52 @@ public function getTypeFromFunctionCall(
4352
Scope $scope,
4453
): Type
4554
{
55+
$fnName = $functionReflection->getName();
4656
$args = $functionCall->getArgs();
47-
if (count($args) < 1) {
57+
58+
if (count($args) < self::FUNCTIONS[$fnName]) {
4859
return ParametersAcceptorSelector::selectSingle($functionReflection->getVariants())->getReturnType();
4960
}
5061

5162
$argType = $scope->getType($args[0]->value);
52-
$fnName = $functionReflection->getName();
5363
if (!is_callable($fnName)) {
5464
throw new ShouldNotHappenException();
5565
}
5666

57-
if (count($args) === 1) {
58-
$constantStrings = TypeUtils::getConstantStrings($argType);
59-
if (count($constantStrings) > 0) {
60-
$strings = [];
67+
$modes = [];
68+
if ($fnName === 'mb_convert_case') {
69+
$modeType = $scope->getType($args[1]->value);
70+
$modes = array_map(static fn ($mode) => $mode->getValue(), TypeUtils::getConstantIntegers($modeType));
71+
} elseif (in_array($fnName, ['ucwords', 'mb_convert_kana'], true)) {
72+
if (count($args) >= 2) {
73+
$modeType = $scope->getType($args[1]->value);
74+
$modes = array_map(static fn ($mode) => $mode->getValue(), TypeUtils::getConstantStrings($modeType));
75+
} else {
76+
$modes = $fnName === 'mb_convert_kana' ? ['KV'] : [" \t\r\n\f\v"];
77+
}
78+
}
79+
80+
$constantStrings = array_map(static fn ($type) => $type->getValue(), TypeUtils::getConstantStrings($argType));
81+
if (count($constantStrings) > 0 && mb_check_encoding($constantStrings, 'UTF-8')) {
82+
$strings = [];
6183

62-
foreach ($constantStrings as $constantString) {
63-
$strings[] = new ConstantStringType($fnName($constantString->getValue()));
84+
$parameters = [];
85+
if (in_array($fnName, ['ucwords', 'mb_convert_case', 'mb_convert_kana'], true)) {
86+
foreach ($modes as $mode) {
87+
foreach ($constantStrings as $constantString) {
88+
$parameters[] = [$constantString, $mode];
89+
}
6490
}
91+
} else {
92+
$parameters = array_map(static fn ($s) => [$s], $constantStrings);
93+
}
94+
95+
foreach ($parameters as $parameter) {
96+
$strings[] = $fnName(...$parameter);
97+
}
6598

66-
return TypeCombinator::union(...$strings);
99+
if (count($strings) !== 0 && mb_check_encoding($strings, 'UTF-8')) {
100+
return TypeCombinator::union(...array_map(static fn ($s) => new ConstantStringType($s), $strings));
67101
}
68102
}
69103

tests/PHPStan/Analyser/data/str-casing.php

+37-7
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,32 @@ class Foo {
1010
* @param non-empty-string $nonE
1111
* @param literal-string $literal
1212
* @param 'foo'|'Foo' $edgeUnion
13+
* @param MB_CASE_UPPER|MB_CASE_LOWER|MB_CASE_TITLE|MB_CASE_FOLD|MB_CASE_UPPER_SIMPLE|MB_CASE_LOWER_SIMPLE|MB_CASE_TITLE_SIMPLE|MB_CASE_FOLD_SIMPLE $caseMode
14+
* @param 'aKV'|'hA'|'AH'|'K'|'KV'|'RNKV' $kanaMode
15+
* @param mixed $mixed
1316
*/
14-
public function bar($numericS, $nonE, $literal, $edgeUnion) {
17+
public function bar($numericS, $nonE, $literal, $edgeUnion, $caseMode, $kanaMode, $mixed) {
1518
assertType("'abc'", strtolower('ABC'));
1619
assertType("'ABC'", strtoupper('abc'));
1720
assertType("'abc'", mb_strtolower('ABC'));
1821
assertType("'ABC'", mb_strtoupper('abc'));
22+
assertType("'abc'", mb_strtolower('ABC', 'UTF-8'));
23+
assertType("'ABC'", mb_strtoupper('abc', 'UTF-8'));
24+
assertType("'abc'", mb_strtolower('Abc'));
25+
assertType("'ABC'", mb_strtoupper('Abc'));
1926
assertType("'aBC'", lcfirst('ABC'));
2027
assertType("'Abc'", ucfirst('abc'));
2128
assertType("'Hello World'", ucwords('hello world'));
29+
assertType("'Hello|World'", ucwords('hello|world', "|"));
30+
assertType("'ČESKÁ REPUBLIKA'", mb_convert_case('Česká republika', MB_CASE_UPPER));
31+
assertType("'česká republika'", mb_convert_case('Česká republika', MB_CASE_LOWER));
32+
assertType("non-falsy-string", mb_convert_case('Česká republika', $mixed));
33+
assertType("'ČESKÁ REPUBLIKA'|'Česká Republika'|'česká republika'", mb_convert_case('Česká republika', $caseMode));
34+
assertType("'Abc123アイウガギグばびぶ漢字'", mb_convert_kana('Abc123アイウガギグばびぶ漢字'));
35+
assertType("'Abc123アイウガギグばびぶ漢字'", mb_convert_kana('Abc123アイウガギグばびぶ漢字', 'aKV'));
36+
assertType("'Abc123アイウガギグバビブ漢字'", mb_convert_kana('Abc123アイウガギグばびぶ漢字', 'hA'));
37+
assertType("'Abc123アガば漢'|'Abc123あか゛ば漢'|'Abc123アカ゛ば漢'|'Abc123アガば漢'|'Abc123アガバ漢'", mb_convert_kana('Abc123アガば漢', $kanaMode));
38+
assertType("non-falsy-string", mb_convert_kana('Abc123アガば漢', $mixed));
2239

2340
assertType("numeric-string", strtolower($numericS));
2441
assertType("numeric-string", strtoupper($numericS));
@@ -27,6 +44,11 @@ public function bar($numericS, $nonE, $literal, $edgeUnion) {
2744
assertType("numeric-string", lcfirst($numericS));
2845
assertType("numeric-string", ucfirst($numericS));
2946
assertType("numeric-string", ucwords($numericS));
47+
assertType("numeric-string", mb_convert_case($numericS, MB_CASE_UPPER));
48+
assertType("numeric-string", mb_convert_case($numericS, MB_CASE_LOWER));
49+
assertType("numeric-string", mb_convert_case($numericS, $mixed));
50+
assertType("numeric-string", mb_convert_kana($numericS));
51+
assertType("numeric-string", mb_convert_kana($numericS, $mixed));
3052

3153
assertType("non-empty-string", strtolower($nonE));
3254
assertType("non-empty-string", strtoupper($nonE));
@@ -35,6 +57,11 @@ public function bar($numericS, $nonE, $literal, $edgeUnion) {
3557
assertType("non-empty-string", lcfirst($nonE));
3658
assertType("non-empty-string", ucfirst($nonE));
3759
assertType("non-empty-string", ucwords($nonE));
60+
assertType("non-empty-string", mb_convert_case($nonE, MB_CASE_UPPER));
61+
assertType("non-empty-string", mb_convert_case($nonE, MB_CASE_LOWER));
62+
assertType("non-empty-string", mb_convert_case($nonE, $mixed));
63+
assertType("non-empty-string", mb_convert_kana($nonE));
64+
assertType("non-empty-string", mb_convert_kana($nonE, $mixed));
3865

3966
assertType("string", strtolower($literal));
4067
assertType("string", strtoupper($literal));
@@ -43,18 +70,21 @@ public function bar($numericS, $nonE, $literal, $edgeUnion) {
4370
assertType("string", lcfirst($literal));
4471
assertType("string", ucfirst($literal));
4572
assertType("string", ucwords($literal));
73+
assertType("string", mb_convert_case($literal, MB_CASE_UPPER));
74+
assertType("string", mb_convert_case($literal, MB_CASE_LOWER));
75+
assertType("string", mb_convert_case($literal, $mixed));
76+
assertType("string", mb_convert_kana($literal));
77+
assertType("string", mb_convert_kana($literal, $mixed));
4678

4779
assertType("'foo'", lcfirst($edgeUnion));
4880
}
4981

5082
public function foo() {
51-
// calls with a 2nd arg could be more precise, but there was no use-case yet to support it
52-
assertType("non-falsy-string", mb_strtolower('ABC', 'UTF-8'));
53-
assertType("non-falsy-string", mb_strtoupper('abc', 'UTF-8'));
54-
assertType("non-falsy-string", ucwords('hello|world!', "|"));
55-
5683
// invalid char conversions still lead to non-falsy-string
5784
assertType("non-falsy-string", mb_strtolower("\xfe\xff\x65\xe5\x67\x2c\x8a\x9e", 'CP1252'));
58-
85+
// valid char sequence, but not support non ASCII / UTF-8 encodings
86+
assertType("non-falsy-string", mb_convert_kana("\x95\x5c\x8c\xbb", 'SJIS-win'));
87+
// invalid UTF-8 sequence
88+
assertType("non-falsy-string", mb_convert_kana("\x95\x5c\x8c\xbb", 'UTF-8'));
5989
}
6090
}

0 commit comments

Comments
 (0)