Skip to content

Commit b61bbd4

Browse files
committed
Use delimiter position to optimize processing
Delimiters may be deleted, so we store delimiter positions instead of pointers. This also allows us to optimize searches within the stack, avoiding quadratic behavior when parsing emphasis. See github/cmark-gfm@75008f1
1 parent e7584cf commit b61bbd4

12 files changed

+192
-51
lines changed

CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi
1818
- `[` and `]` are no longer added as `Delimiter` objects on the stack; a new `Bracket` type with its own stack is used instead
1919
- `UrlAutolinkParser` no longer parses URLs with more than 127 subdomains
2020
- Expanded reference links can no longer exceed 100kb, or the size of the input document (whichever is greater)
21+
- Delimiters should always provide a non-null value via `DelimiterInterface::getIndex()`
22+
- We'll attempt to infer the index based on surrounding delimiters where possible
23+
- The `DelimiterStack` now accepts integer positions for any `$stackBottom` argument
2124
- Several small performance optimizations
2225

2326
## [2.5.3] - 2024-08-16
@@ -95,14 +98,16 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi
9598

9699
- Returning dynamic values from `DelimiterProcessorInterface::getDelimiterUse()` is deprecated
97100
- You should instead implement `CacheableDelimiterProcessorInterface` to help the engine perform caching to avoid performance issues.
101+
- Failing to set a delimiter's index (or returning `null` from `DelimiterInterface::getIndex()`) is deprecated and will not be supported in 3.0
98102
- Deprecated `DelimiterInterface::isActive()` and `DelimiterInterface::setActive()`, as these are no longer used by the engine
99103
- Deprecated `DelimiterStack::removeEarlierMatches()` and `DelimiterStack::searchByCharacter()`, as these are no longer used by the engine
104+
- Passing a `DelimiterInterface` as the `$stackBottom` argument to `DelimiterStack::processDelimiters()` or `::removeAll()` is deprecated and will not be supported in 3.0; pass the integer position instead.
100105

101106
### Fixed
102107

103108
- Fixed NUL characters not being replaced in the input
104109
- Fixed quadratic complexity parsing unclosed inline links
105-
- Fixed quadratic complexity finding the bottom opener for emphasis and strikethrough delimiters
110+
- Fixed quadratic complexity parsing emphasis and strikethrough delimiters
106111
- Fixed issue where having 500,000+ delimiters could trigger a [known segmentation fault issue in PHP's garbage collection](https://bugs.php.net/bug.php?id=68606)
107112
- Fixed quadratic complexity deactivating link openers
108113
- Fixed catastrophic backtracking when parsing link labels/titles

phpstan.neon.dist

+2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ parameters:
77
message: '#Parameter .+ of class .+Reference constructor expects string, string\|null given#'
88
- path: src/Util/RegexHelper.php
99
message: '#Method .+RegexHelper::unescape\(\) should return string but returns string\|null#'
10+
- path: src/Delimiter/DelimiterStack.php
11+
message: '#unknown class WeakMap#'
1012
exceptions:
1113
uncheckedExceptionClasses:
1214
# Exceptions caused by bad developer logic that should always bubble up:

src/Delimiter/Bracket.php

+8-15
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,17 @@ final class Bracket
1919
{
2020
private Node $node;
2121
private ?Bracket $previous;
22-
private ?DelimiterInterface $previousDelimiter;
2322
private bool $hasNext = false;
24-
private int $index;
23+
private int $position;
2524
private bool $image;
2625
private bool $active = true;
2726

28-
public function __construct(Node $node, ?Bracket $previous, ?DelimiterInterface $previousDelimiter, int $index, bool $image)
27+
public function __construct(Node $node, ?Bracket $previous, int $position, bool $image)
2928
{
30-
$this->node = $node;
31-
$this->previous = $previous;
32-
$this->previousDelimiter = $previousDelimiter;
33-
$this->index = $index;
34-
$this->image = $image;
29+
$this->node = $node;
30+
$this->previous = $previous;
31+
$this->position = $position;
32+
$this->image = $image;
3533
}
3634

3735
public function getNode(): Node
@@ -44,19 +42,14 @@ public function getPrevious(): ?Bracket
4442
return $this->previous;
4543
}
4644

47-
public function getPreviousDelimiter(): ?DelimiterInterface
48-
{
49-
return $this->previousDelimiter;
50-
}
51-
5245
public function hasNext(): bool
5346
{
5447
return $this->hasNext;
5548
}
5649

57-
public function getIndex(): int
50+
public function getPosition(): int
5851
{
59-
return $this->index;
52+
return $this->position;
6053
}
6154

6255
public function isImage(): bool

src/Delimiter/DelimiterStack.php

+114-15
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
use League\CommonMark\Delimiter\Processor\CacheableDelimiterProcessorInterface;
2323
use League\CommonMark\Delimiter\Processor\DelimiterProcessorCollection;
24+
use League\CommonMark\Exception\LogicException;
2425
use League\CommonMark\Node\Inline\AdjacentTextMerger;
2526
use League\CommonMark\Node\Node;
2627

@@ -32,6 +33,23 @@ final class DelimiterStack
3233
/** @psalm-readonly-allow-private-mutation */
3334
private ?Bracket $brackets = null;
3435

36+
/**
37+
* @deprecated This property will be removed in 3.0 once all delimiters MUST have an index/position
38+
*
39+
* @var \SplObjectStorage<DelimiterInterface, int>|\WeakMap<DelimiterInterface, int>
40+
*/
41+
private $missingIndexCache;
42+
43+
public function __construct()
44+
{
45+
if (\PHP_VERSION_ID >= 80000) {
46+
/** @psalm-suppress PropertyTypeCoercion */
47+
$this->missingIndexCache = new \WeakMap(); // @phpstan-ignore-line
48+
} else {
49+
$this->missingIndexCache = new \SplObjectStorage(); // @phpstan-ignore-line
50+
}
51+
}
52+
3553
public function push(DelimiterInterface $newDelimiter): void
3654
{
3755
$newDelimiter->setPrevious($this->top);
@@ -52,7 +70,7 @@ public function addBracket(Node $node, int $index, bool $image): void
5270
$this->brackets->setHasNext(true);
5371
}
5472

55-
$this->brackets = new Bracket($node, $this->brackets, $this->top, $index, $image);
73+
$this->brackets = new Bracket($node, $this->brackets, $index, $image);
5674
}
5775

5876
/**
@@ -63,14 +81,21 @@ public function getLastBracket(): ?Bracket
6381
return $this->brackets;
6482
}
6583

66-
private function findEarliest(?DelimiterInterface $stackBottom = null): ?DelimiterInterface
84+
/**
85+
* @throws LogicException
86+
*/
87+
private function findEarliest(int $stackBottom): ?DelimiterInterface
6788
{
68-
$delimiter = $this->top;
69-
while ($delimiter !== null && $delimiter->getPrevious() !== $stackBottom) {
70-
$delimiter = $delimiter->getPrevious();
89+
// Move back to first relevant delim.
90+
$delimiter = $this->top;
91+
$lastChecked = null;
92+
93+
while ($delimiter !== null && self::getIndex($delimiter) > $stackBottom) {
94+
$lastChecked = $delimiter;
95+
$delimiter = $delimiter->getPrevious();
7196
}
7297

73-
return $delimiter;
98+
return $lastChecked;
7499
}
75100

76101
/**
@@ -113,6 +138,9 @@ public function removeDelimiter(DelimiterInterface $delimiter): void
113138
// segfaults like in https://bugs.php.net/bug.php?id=68606.
114139
$delimiter->setPrevious(null);
115140
$delimiter->setNext(null);
141+
142+
// TODO: Remove the line below once PHP 7.4 support is dropped, as WeakMap won't hold onto the reference, making this unnecessary
143+
unset($this->missingIndexCache[$delimiter]);
116144
}
117145

118146
private function removeDelimiterAndNode(DelimiterInterface $delimiter): void
@@ -121,19 +149,30 @@ private function removeDelimiterAndNode(DelimiterInterface $delimiter): void
121149
$this->removeDelimiter($delimiter);
122150
}
123151

152+
/**
153+
* @throws LogicException
154+
*/
124155
private function removeDelimitersBetween(DelimiterInterface $opener, DelimiterInterface $closer): void
125156
{
126-
$delimiter = $closer->getPrevious();
127-
while ($delimiter !== null && $delimiter !== $opener) {
157+
$delimiter = $closer->getPrevious();
158+
$openerPosition = self::getIndex($opener);
159+
while ($delimiter !== null && self::getIndex($delimiter) > $openerPosition) {
128160
$previous = $delimiter->getPrevious();
129161
$this->removeDelimiter($delimiter);
130162
$delimiter = $previous;
131163
}
132164
}
133165

134-
public function removeAll(?DelimiterInterface $stackBottom = null): void
166+
/**
167+
* @param DelimiterInterface|int|null $stackBottom
168+
*
169+
* @throws LogicException if the index/position cannot be determined for some delimiter
170+
*/
171+
public function removeAll($stackBottom = null): void
135172
{
136-
while ($this->top && $this->top !== $stackBottom) {
173+
$stackBottomPosition = \is_int($stackBottom) ? $stackBottom : self::getIndex($stackBottom);
174+
175+
while ($this->top && $this->getIndex($this->top) > $stackBottomPosition) {
137176
$this->removeDelimiter($this->top);
138177
}
139178
}
@@ -188,12 +227,22 @@ public function searchByCharacter($characters): ?DelimiterInterface
188227
return $opener;
189228
}
190229

191-
public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterProcessorCollection $processors): void
230+
/**
231+
* @param DelimiterInterface|int|null $stackBottom
232+
*
233+
* @throws LogicException if the index/position cannot be determined for any delimiter
234+
*
235+
* @todo change $stackBottom to an int in 3.0
236+
*/
237+
public function processDelimiters($stackBottom, DelimiterProcessorCollection $processors): void
192238
{
239+
/** @var array<string, int> $openersBottom */
193240
$openersBottom = [];
194241

242+
$stackBottomPosition = \is_int($stackBottom) ? $stackBottom : self::getIndex($stackBottom);
243+
195244
// Find first closer above stackBottom
196-
$closer = $this->findEarliest($stackBottom);
245+
$closer = $this->findEarliest($stackBottomPosition);
197246

198247
// Move forward, looking for closers, and handling each
199248
while ($closer !== null) {
@@ -217,7 +266,7 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro
217266
$openerFound = false;
218267
$potentialOpenerFound = false;
219268
$opener = $closer->getPrevious();
220-
while ($opener !== null && $opener !== $stackBottom && $opener !== ($openersBottom[$openersBottomCacheKey] ?? null)) {
269+
while ($opener !== null && ($openerPosition = self::getIndex($opener)) > $stackBottomPosition && $openerPosition >= ($openersBottom[$openersBottomCacheKey] ?? 0)) {
221270
if ($opener->canOpen() && $opener->getChar() === $openingDelimiterChar) {
222271
$potentialOpenerFound = true;
223272
$useDelims = $delimiterProcessor->getDelimiterUse($opener, $closer);
@@ -234,7 +283,7 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro
234283
// Set lower bound for future searches
235284
// TODO: Remove this conditional check in 3.0. It only exists to prevent behavioral BC breaks in 2.x.
236285
if ($potentialOpenerFound === false || $delimiterProcessor instanceof CacheableDelimiterProcessorInterface) {
237-
$openersBottom[$openersBottomCacheKey] = $closer->getPrevious();
286+
$openersBottom[$openersBottomCacheKey] = self::getIndex($closer);
238287
}
239288

240289
if (! $potentialOpenerFound && ! $closer->canOpen()) {
@@ -282,7 +331,7 @@ public function processDelimiters(?DelimiterInterface $stackBottom, DelimiterPro
282331
}
283332

284333
// Remove all delimiters
285-
$this->removeAll($stackBottom);
334+
$this->removeAll($stackBottomPosition);
286335
}
287336

288337
/**
@@ -298,4 +347,54 @@ public function __destruct()
298347
$this->removeBracket();
299348
}
300349
}
350+
351+
/**
352+
* @deprecated This method will be dropped in 3.0 once all delimiters MUST have an index/position
353+
*
354+
* @throws LogicException if no index was defined on this delimiter, and no reasonable guess could be made based on its neighbors
355+
*/
356+
private function getIndex(?DelimiterInterface $delimiter): int
357+
{
358+
if ($delimiter === null) {
359+
return -1;
360+
}
361+
362+
if (($index = $delimiter->getIndex()) !== null) {
363+
return $index;
364+
}
365+
366+
if (isset($this->missingIndexCache[$delimiter])) {
367+
return $this->missingIndexCache[$delimiter];
368+
}
369+
370+
$prev = $delimiter->getPrevious();
371+
$next = $delimiter->getNext();
372+
373+
$i = 0;
374+
do {
375+
$i++;
376+
if ($prev === null) {
377+
break;
378+
}
379+
380+
if ($prev->getIndex() !== null) {
381+
return $this->missingIndexCache[$delimiter] = $prev->getIndex() + $i;
382+
}
383+
} while ($prev = $prev->getPrevious());
384+
385+
$j = 0;
386+
do {
387+
$j++;
388+
if ($next === null) {
389+
break;
390+
}
391+
392+
if ($next->getIndex() !== null) {
393+
return $this->missingIndexCache[$delimiter] = $next->getIndex() - $j;
394+
}
395+
} while ($next = $next->getNext());
396+
397+
// No index was defined on this delimiter, and none could be guesstimated based on the stack.
398+
throw new LogicException('No index was defined on this delimiter, and none could be guessed based on the stack. Ensure you are passing the index when instantiating the Delimiter.');
399+
}
301400
}

src/Extension/CommonMark/Parser/Inline/CloseBracketParser.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public function parse(InlineParserContext $inlineContext): bool
103103

104104
// Process delimiters such as emphasis inside link/image
105105
$delimiterStack = $inlineContext->getDelimiterStack();
106-
$stackBottom = $opener->getPreviousDelimiter();
106+
$stackBottom = $opener->getPosition();
107107
$delimiterStack->processDelimiters($stackBottom, $this->environment->getDelimiterProcessors());
108108
$delimiterStack->removeBracket();
109109
$delimiterStack->removeAll($stackBottom);
@@ -179,7 +179,7 @@ private function tryParseReference(Cursor $cursor, ReferenceMapInterface $refere
179179
} elseif (! $opener->hasNext()) {
180180
// Empty or missing second label means to use the first label as the reference.
181181
// The reference must not contain a bracket. If we know there's a bracket, we don't even bother checking it.
182-
$start = $opener->getIndex();
182+
$start = $opener->getPosition();
183183
$length = $startPos - $start;
184184
} else {
185185
$cursor->restoreState($savePos);

src/Extension/SmartPunct/QuoteParser.php

+2-1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ public function parse(InlineParserContext $inlineContext): bool
4646
{
4747
$char = $inlineContext->getFullMatch();
4848
$cursor = $inlineContext->getCursor();
49+
$index = $cursor->getPosition();
4950

5051
$charBefore = $cursor->peek(-1);
5152
if ($charBefore === null) {
@@ -67,7 +68,7 @@ public function parse(InlineParserContext $inlineContext): bool
6768
$inlineContext->getContainer()->appendChild($node);
6869

6970
// Add entry to stack to this opener
70-
$inlineContext->getDelimiterStack()->push(new Delimiter($char, 1, $node, $canOpen, $canClose));
71+
$inlineContext->getDelimiterStack()->push(new Delimiter($char, 1, $node, $canOpen, $canClose, $index));
7172

7273
return true;
7374
}

src/Extension/Table/TableParser.php

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
namespace League\CommonMark\Extension\Table;
1717

18+
use League\CommonMark\Exception\LogicException;
1819
use League\CommonMark\Parser\Block\AbstractBlockContinueParser;
1920
use League\CommonMark\Parser\Block\BlockContinue;
2021
use League\CommonMark\Parser\Block\BlockContinueParserInterface;
@@ -150,6 +151,9 @@ public function parseInlines(InlineParserEngineInterface $inlineParser): void
150151
}
151152
}
152153

154+
/**
155+
* @throws LogicException
156+
*/
153157
private function parseCell(string $cell, int $column, InlineParserEngineInterface $inlineParser): TableCell
154158
{
155159
$tableCell = new TableCell(TableCell::TYPE_DATA, $this->columns[$column] ?? null);

src/Parser/Block/BlockContinueParserWithInlinesInterface.php

+3
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,15 @@
1313

1414
namespace League\CommonMark\Parser\Block;
1515

16+
use League\CommonMark\Exception\LogicException;
1617
use League\CommonMark\Parser\InlineParserEngineInterface;
1718

1819
interface BlockContinueParserWithInlinesInterface extends BlockContinueParserInterface
1920
{
2021
/**
2122
* Parse any inlines inside of the current block
23+
*
24+
* @throws LogicException
2225
*/
2326
public function parseInlines(InlineParserEngineInterface $inlineParser): void;
2427
}

src/Parser/Inline/InlineParserInterface.php

+4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,15 @@
1313

1414
namespace League\CommonMark\Parser\Inline;
1515

16+
use League\CommonMark\Exception\LogicException;
1617
use League\CommonMark\Parser\InlineParserContext;
1718

1819
interface InlineParserInterface
1920
{
2021
public function getMatchDefinition(): InlineParserMatch;
2122

23+
/**
24+
* @throws LogicException
25+
*/
2226
public function parse(InlineParserContext $inlineContext): bool;
2327
}

0 commit comments

Comments
 (0)