Skip to content

Commit

Permalink
Extension symbols: track separatelly
Browse files Browse the repository at this point in the history
  • Loading branch information
janedbal committed Oct 15, 2024
1 parent 7ca4789 commit 491d3e9
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 52 deletions.
1 change: 1 addition & 0 deletions src/Analyser.php
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,7 @@ private function initExistingSymbols(): void

/** @var array<string, array<string, mixed>> $definedConstants */
$definedConstants = get_defined_constants(true);

foreach ($definedConstants as $constantExtension => $constants) {
foreach ($constants as $constantName => $_) {
if ($constantExtension === 'user') {
Expand Down
162 changes: 116 additions & 46 deletions src/UsedSymbolExtractor.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@

namespace ShipMonk\ComposerDependencyAnalyser;

use function array_combine;
use function array_fill_keys;
use function array_map;
use function array_merge;
use function count;
use function explode;
use function is_array;
use function ltrim;
use function strlen;
use function strpos;
use function strtolower;
use function substr;
use function token_get_all;
use const PHP_VERSION_ID;
Expand All @@ -22,14 +23,18 @@
use const T_CURLY_OPEN;
use const T_DOC_COMMENT;
use const T_DOLLAR_OPEN_CURLY_BRACES;
use const T_DOUBLE_COLON;
use const T_ENUM;
use const T_FUNCTION;
use const T_INSTEADOF;
use const T_INTERFACE;
use const T_NAME_FULLY_QUALIFIED;
use const T_NAME_QUALIFIED;
use const T_NAMESPACE;
use const T_NEW;
use const T_NS_SEPARATOR;
use const T_NULLSAFE_OBJECT_OPERATOR;
use const T_OBJECT_OPERATOR;
use const T_STRING;
use const T_TRAIT;
use const T_USE;
Expand Down Expand Up @@ -65,10 +70,9 @@ public function __construct(string $code)
* It does not produce any local names in current namespace
* - this results in very limited functionality in files without namespace
*
* @param array<string> $extClasses
* @param array<string> $extFunctions
* @param array<string> $extConstants
*
* @param list<string> $extClasses
* @param list<string> $extFunctions
* @param list<string> $extConstants
* @return array<SymbolKind::*, array<string, list<int>>>
* @license Inspired by https://github.com/doctrine/annotations/blob/2.0.0/lib/Doctrine/Common/Annotations/TokenParser.php
*/
Expand All @@ -79,16 +83,13 @@ public function parseUsedSymbols(
): array
{
$usedSymbols = [];
$useStatements = $initialUseStatements = array_merge(
array_combine($extClasses, $extClasses),
array_combine($extFunctions, $extFunctions),
array_combine($extConstants, $extConstants)
);
$useStatementKinds = $initialUseStatementKinds = array_merge(
array_fill_keys($extClasses, SymbolKind::CLASSLIKE),
array_fill_keys($extFunctions, SymbolKind::FUNCTION),
array_fill_keys($extConstants, SymbolKind::CONSTANT)
$extensionSymbols = array_merge(
array_fill_keys(array_map('strtolower', $extClasses), SymbolKind::CLASSLIKE),
array_fill_keys(array_map('strtolower', $extFunctions), SymbolKind::FUNCTION),
array_fill_keys(array_map('strtolower', $extConstants), SymbolKind::CONSTANT)
);
$useStatements = [];
$useStatementKinds = [];

$level = 0; // {, }, {$, ${
$squareLevel = 0; // [, ], #[
Expand Down Expand Up @@ -128,13 +129,14 @@ public function parseUsedSymbols(
case PHP_VERSION_ID >= 80000 ? T_NAMESPACE : -1:
// namespace change
$inGlobalScope = false;
$useStatements = $initialUseStatements;
$useStatementKinds = $initialUseStatementKinds;
$useStatements = [];
$useStatementKinds = [];
break;

case PHP_VERSION_ID >= 80000 ? T_NAME_FULLY_QUALIFIED : -1:
$symbolName = $this->normalizeBackslash($token[1]);
$kind = $this->getFqnSymbolKind($this->pointer - 2, $this->pointer, $inAttributeSquareLevel !== null);
$lowerSymbolName = strtolower($symbolName);
$kind = $extensionSymbols[$lowerSymbolName] ?? $this->getFqnSymbolKind($this->pointer - 2, $this->pointer, $inAttributeSquareLevel !== null);
$usedSymbols[$kind][$symbolName][] = $token[2];
break;

Expand All @@ -143,21 +145,34 @@ public function parseUsedSymbols(

if (isset($useStatements[$neededAlias])) {
$symbolName = $useStatements[$neededAlias] . substr($token[1], strlen($neededAlias));
$kind = $this->getFqnSymbolKind($this->pointer - 2, $this->pointer, $inAttributeSquareLevel !== null);
$usedSymbols[$kind][$symbolName][] = $token[2];

} elseif ($inGlobalScope) {
$symbolName = $token[1];
$kind = $this->getFqnSymbolKind($this->pointer - 2, $this->pointer, $inAttributeSquareLevel !== null);
$usedSymbols[$kind][$symbolName][] = $token[2];
} else {
break;
}

$lowerSymbolName = strtolower($symbolName);
$kind = $extensionSymbols[$lowerSymbolName] ?? $this->getFqnSymbolKind($this->pointer - 2, $this->pointer, $inAttributeSquareLevel !== null);
$usedSymbols[$kind][$symbolName][] = $token[2];

break;

case PHP_VERSION_ID >= 80000 ? T_STRING : -1:
$name = $token[1];
$lowerName = strtolower($name);
$pointerBeforeName = $this->pointer - 2;
$pointerAfterName = $this->pointer;

if (!$this->canBeSymbolName($pointerBeforeName, $pointerAfterName)) {
break;
}

if (isset($useStatements[$name])) {
if (isset($extensionSymbols[$lowerName])) {
$symbolName = $name;
$kind = $extensionSymbols[$lowerName];
$usedSymbols[$kind][$symbolName][] = $token[2];

} elseif (isset($useStatements[$name])) {
$symbolName = $useStatements[$name];
$kind = $useStatementKinds[$name];
$usedSymbols[$kind][$symbolName][] = $token[2];
Expand All @@ -172,18 +187,19 @@ public function parseUsedSymbols(
if (substr($nextName, 0, 1) !== '\\') { // not a namespace-relative name, but a new namespace declaration
// namespace change
$inGlobalScope = false;
$useStatements = $initialUseStatements;
$useStatementKinds = $initialUseStatementKinds;
$useStatements = [];
$useStatementKinds = [];
}

break;

case PHP_VERSION_ID < 80000 ? T_NS_SEPARATOR : -1:
$pointerBeforeName = $this->pointer - 2;
$symbolName = $this->normalizeBackslash($this->parseNameForOldPhp());
$lowerSymbolName = strtolower($symbolName);

if ($symbolName !== '') { // e.g. \array (NS separator followed by not-a-name)
$kind = $this->getFqnSymbolKind($pointerBeforeName, $this->pointer - 1, false);
$kind = $extensionSymbols[$lowerSymbolName] ?? $this->getFqnSymbolKind($pointerBeforeName, $this->pointer - 1, false);
$usedSymbols[$kind][$symbolName][] = $token[2];
}

Expand All @@ -192,23 +208,34 @@ public function parseUsedSymbols(
case PHP_VERSION_ID < 80000 ? T_STRING : -1:
$pointerBeforeName = $this->pointer - 2;
$name = $this->parseNameForOldPhp();
$lowerName = strtolower($name);
$pointerAfterName = $this->pointer - 1;

if (!$this->canBeSymbolName($pointerBeforeName, $pointerAfterName)) {
break;
}

if (isset($useStatements[$name])) { // unqualified name
$symbolName = $useStatements[$name];
$kind = $useStatementKinds[$name];
$usedSymbols[$kind][$symbolName][] = $token[2];

} elseif (isset($extensionSymbols[$lowerName])) {
$symbolName = $name;
$kind = $extensionSymbols[$lowerName];
$usedSymbols[$kind][$symbolName][] = $token[2];

} else {
[$neededAlias] = explode('\\', $name, 2);

if (isset($useStatements[$neededAlias])) { // qualified name
$symbolName = $useStatements[$neededAlias] . substr($name, strlen($neededAlias));
$kind = $this->getFqnSymbolKind($pointerBeforeName, $this->pointer - 1, false);
$kind = $this->getFqnSymbolKind($pointerBeforeName, $pointerAfterName, false);
$usedSymbols[$kind][$symbolName][] = $token[2];

} elseif ($inGlobalScope && strpos($name, '\\') !== false) {
$symbolName = $name;
$kind = $this->getFqnSymbolKind($pointerBeforeName, $this->pointer - 1, false);
$kind = $this->getFqnSymbolKind($pointerBeforeName, $pointerAfterName, false);
$usedSymbols[$kind][$symbolName][] = $token[2];
}
}
Expand Down Expand Up @@ -369,44 +396,87 @@ private function getFqnSymbolKind(
return SymbolKind::CLASSLIKE;
}

$tokenBeforeName = $this->getTokenBefore($pointerBeforeName);
$tokenAfterName = $this->getTokenAfter($pointerAfterName);

if (
$tokenAfterName === '('
&& $tokenBeforeName[0] !== T_NEW // eliminate new \ClassName(
) {
return SymbolKind::FUNCTION;
}

return SymbolKind::CLASSLIKE; // constant may fall here, this is eliminated later
}

private function canBeSymbolName(
int $pointerBeforeName,
int $pointerAfterName
): bool
{
$tokenBeforeName = $this->getTokenBefore($pointerBeforeName);
$tokenAfterName = $this->getTokenAfter($pointerAfterName);

if (
$tokenBeforeName[0] === T_DOUBLE_COLON
|| $tokenBeforeName[0] === T_INSTEADOF
|| $tokenBeforeName[0] === T_AS
|| $tokenBeforeName[0] === T_FUNCTION
|| $tokenBeforeName[0] === T_OBJECT_OPERATOR
|| $tokenBeforeName[0] === (PHP_VERSION_ID > 80000 ? T_NULLSAFE_OBJECT_OPERATOR : -1)
|| $tokenAfterName[0] === T_INSTEADOF
|| $tokenAfterName[0] === T_AS
) {
return false;
}

return true;
}

/**
* @return array{int, string}|string
*/
private function getTokenBefore(int $pointer)
{
do {
$tokenBeforeName = $this->tokens[$pointerBeforeName];
$token = $this->tokens[$pointer];

if (!is_array($tokenBeforeName)) {
if (!is_array($token)) {
break;
}

if ($tokenBeforeName[0] === T_WHITESPACE || $tokenBeforeName[0] === T_COMMENT || $tokenBeforeName[0] === T_DOC_COMMENT) {
$pointerBeforeName--;
if ($token[0] === T_WHITESPACE || $token[0] === T_COMMENT || $token[0] === T_DOC_COMMENT) {
$pointer--;
continue;
}

break;
} while ($pointerBeforeName >= 0);
} while ($pointer >= 0);

return $token;
}

/**
* @return array{int, string}|string
*/
private function getTokenAfter(int $pointer)
{
do {
$tokenAfterName = $this->tokens[$pointerAfterName];
$token = $this->tokens[$pointer];

if (!is_array($tokenAfterName)) {
if (!is_array($token)) {
break;
}

if ($tokenAfterName[0] === T_WHITESPACE || $tokenAfterName[0] === T_COMMENT || $tokenAfterName[0] === T_DOC_COMMENT) {
$pointerAfterName++;
if ($token[0] === T_WHITESPACE || $token[0] === T_COMMENT || $token[0] === T_DOC_COMMENT) {
$pointer++;
continue;
}

break;
} while ($pointerAfterName < $this->numTokens);
} while ($pointer < $this->numTokens);

if (
$tokenAfterName === '('
&& $tokenBeforeName[0] !== T_NEW // eliminate new \ClassName(
) {
return SymbolKind::FUNCTION;
}

return SymbolKind::CLASSLIKE; // constant may fall here, this is eliminated later
return $token;
}

}
50 changes: 44 additions & 6 deletions tests/UsedSymbolExtractorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace ShipMonk\ComposerDependencyAnalyser;

use PHPUnit\Framework\TestCase;
use function array_map;
use function file_get_contents;
use const PHP_VERSION_ID;

Expand All @@ -20,7 +21,14 @@ public function test(string $path, array $expectedUsages): void

$extractor = new UsedSymbolExtractor($code);

self::assertSame($expectedUsages, $extractor->parseUsedSymbols(['PDO'], ['json_encode'], ['LIBXML_ERR_FATAL']));
self::assertSame(
$expectedUsages,
$extractor->parseUsedSymbols(
['PDO'],
array_map('strtolower', ['json_encode', 'DDTrace\active_span', 'DDTrace\root_span']),
['LIBXML_ERR_FATAL', 'LIBXML_ERR_ERROR', 'DDTrace\DBM_PROPAGATION_FULL']
)
);
}

/**
Expand Down Expand Up @@ -49,6 +57,11 @@ public function provideVariants(): iterable
],
];

yield 'T_STRING issues' => [
__DIR__ . '/data/not-autoloaded/used-symbols/t-string-issues.php',
[],
];

yield 'various usages' => [
__DIR__ . '/data/not-autoloaded/used-symbols/various-usages.php',
[
Expand Down Expand Up @@ -122,15 +135,40 @@ public function provideVariants(): iterable
__DIR__ . '/data/not-autoloaded/used-symbols/extensions.php',
[
SymbolKind::FUNCTION => [
'json_encode' => [5],
'json_decode' => [12],
'json_encode' => [8],
'DDTrace\active_span' => [12],
'DDTrace\root_span' => [13],
'json_decode' => [21],
],
SymbolKind::CONSTANT => [
'LIBXML_ERR_FATAL' => [9],
'LIBXML_ERR_ERROR' => [10],
'DDTrace\DBM_PROPAGATION_FULL' => [14],
],
SymbolKind::CLASSLIKE => [
'PDO' => [11],
'CURLOPT_SSL_VERIFYHOST' => [19],
],
],
];

yield 'extensions global' => [
__DIR__ . '/data/not-autoloaded/used-symbols/extensions-global.php',
[
SymbolKind::FUNCTION => [
'json_encode' => [8],
'DDTrace\active_span' => [12],
'DDTrace\root_span' => [13],
'json_decode' => [21],
],
SymbolKind::CONSTANT => [
'LIBXML_ERR_FATAL' => [6],
'LIBXML_ERR_FATAL' => [9],
'LIBXML_ERR_ERROR' => [10],
'DDTrace\DBM_PROPAGATION_FULL' => [14],
],
SymbolKind::CLASSLIKE => [
'PDO' => [7],
'CURLOPT_SSL_VERIFYHOST' => [10],
'PDO' => [11],
'CURLOPT_SSL_VERIFYHOST' => [19],
],
],
];
Expand Down
Loading

0 comments on commit 491d3e9

Please sign in to comment.