From 35d2d09c398e17c87ed93810b675a2efb92d4db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Wed, 13 May 2026 23:44:33 +0200 Subject: [PATCH 1/2] Two micro-optimizations: cache active block parser, ASCII fast path in isLetter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cache the active block parser to avoid calling end() on the parsers array on every getActiveBlockParser() call. end() costs ~67ns; a direct property read costs ~28ns. getActiveBlockParser() is called ~5x per line so this compounds quickly. The cache is kept in sync in activateBlockParser() and deactivateBlockParser(). Add an ASCII fast path to RegexHelper::isLetter(). The previous implementation called preg_match('/[\pL]/u', $char) on every non-blank, non-indented line to detect whether to skip block-start parsing. For single-byte ASCII characters (the vast majority in Markdown), a direct range comparison is ~60% faster than the regex. Micro-benchmarks (PHP 8.5.2, OPcache on, Xdebug off): end($parsers) 67 ns → property read 28 ns (-58%) preg_match Unicode 45 ns → char range 18 ns (-60%) --- src/Parser/MarkdownParser.php | 16 ++++++++++------ src/Util/RegexHelper.php | 7 ++++++- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Parser/MarkdownParser.php b/src/Parser/MarkdownParser.php index 904c7c45b4..75d384d0fd 100644 --- a/src/Parser/MarkdownParser.php +++ b/src/Parser/MarkdownParser.php @@ -53,6 +53,9 @@ final class MarkdownParser implements MarkdownParserInterface /** @psalm-readonly-allow-private-mutation */ private Cursor $cursor; + /** @psalm-readonly-allow-private-mutation */ + private BlockContinueParserInterface $activeBlockParser; + /** * @var array * @@ -297,6 +300,7 @@ private function addChild(BlockContinueParserInterface $blockParser, ?int $start private function activateBlockParser(BlockContinueParserInterface $blockParser): void { $this->activeBlockParsers[] = $blockParser; + $this->activeBlockParser = $blockParser; } /** @@ -309,6 +313,11 @@ private function deactivateBlockParser(): BlockContinueParserInterface throw new ParserLogicException('The last block parser should not be deactivated'); } + $last = \end($this->activeBlockParsers); + if ($last !== false) { + $this->activeBlockParser = $last; + } + return $popped; } @@ -346,11 +355,6 @@ private function updateReferenceMap(iterable $references): void */ public function getActiveBlockParser(): BlockContinueParserInterface { - $active = \end($this->activeBlockParsers); - if ($active === false) { - throw new ParserLogicException('No active block parsers are available'); - } - - return $active; + return $this->activeBlockParser; } } diff --git a/src/Util/RegexHelper.php b/src/Util/RegexHelper.php index 429b2d85f0..38ac8b441d 100644 --- a/src/Util/RegexHelper.php +++ b/src/Util/RegexHelper.php @@ -98,7 +98,12 @@ public static function isLetter(?string $character): bool return false; } - return \preg_match('/[\pL]/u', $character) === 1; + // Fast path for ASCII letters (the common case in Markdown) + if (\strlen($character) === 1) { + return ($character >= 'a' && $character <= 'z') || ($character >= 'A' && $character <= 'Z'); + } + + return \preg_match('/^\pL/u', $character) === 1; } /** From 9d945ee15bd5e974f5a0ee1a78580b1d711ebab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Tamarelle?= Date: Thu, 14 May 2026 00:05:01 +0200 Subject: [PATCH 2/2] Use ctype_alpha for ASCII fast path in RegexHelper::isLetter() --- src/Util/RegexHelper.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Util/RegexHelper.php b/src/Util/RegexHelper.php index 38ac8b441d..e311ccdfcc 100644 --- a/src/Util/RegexHelper.php +++ b/src/Util/RegexHelper.php @@ -98,9 +98,9 @@ public static function isLetter(?string $character): bool return false; } - // Fast path for ASCII letters (the common case in Markdown) + // Fast path for ASCII (the common case in Markdown); ctype_alpha is locale-independent for single bytes if (\strlen($character) === 1) { - return ($character >= 'a' && $character <= 'z') || ($character >= 'A' && $character <= 'Z'); + return \ctype_alpha($character); } return \preg_match('/^\pL/u', $character) === 1;