1
1
// Licensed to the .NET Foundation under one or more agreements.
2
2
// The .NET Foundation licenses this file to you under the MIT license.
3
3
4
+ #if SYSTEM_TEXT_REGULAREXPRESSIONS
4
5
using System . Buffers ;
6
+ #endif
5
7
using System . Collections . Generic ;
6
8
using System . Diagnostics ;
7
9
@@ -10,28 +12,55 @@ namespace System.Text.RegularExpressions
10
12
/// <summary>Contains state and provides operations related to finding the next location a match could possibly begin.</summary>
11
13
internal sealed class RegexFindOptimizations
12
14
{
13
- /// <summary>True if the input should be processed right-to-left rather than left-to-right.</summary>
14
- private readonly bool _rightToLeft ;
15
15
/// <summary>Lookup table used for optimizing ASCII when doing set queries.</summary>
16
16
private readonly uint [ ] ? [ ] ? _asciiLookups ;
17
17
18
- public RegexFindOptimizations ( RegexNode root , RegexOptions options )
18
+ public static RegexFindOptimizations Create ( RegexNode root , RegexOptions options )
19
+ {
20
+ RegexFindOptimizations opts = new ( root , options , isLeadingPartial : false ) ;
21
+
22
+ if ( ( options & RegexOptions . RightToLeft ) == 0 &&
23
+ ! opts . IsUseful &&
24
+ RegexPrefixAnalyzer . FindLeadingPositiveLookahead ( root ) is RegexNode positiveLookahead )
25
+ {
26
+ RegexFindOptimizations positiveLookaheadOpts = new ( positiveLookahead . Child ( 0 ) , options , isLeadingPartial : true ) ;
27
+
28
+ // Fixups to incorporate relevant information from the original optimizations.
29
+ // - If the original has a larger minimum length than the lookahead, use it. Lookaheads don't currently factor into
30
+ // the computation of the minimum as it complicates the logic due to them possibly overlapping with other portions.
31
+ // - Use whatever max came from the original, if any. We shouldn't have computed a max for the lookahead because
32
+ // it's partial.
33
+ positiveLookaheadOpts . MinRequiredLength = Math . Max ( opts . MinRequiredLength , positiveLookaheadOpts . MinRequiredLength ) ;
34
+ positiveLookaheadOpts . MaxPossibleLength = opts . MaxPossibleLength ;
35
+
36
+ opts = positiveLookaheadOpts ;
37
+ }
38
+
39
+ return opts ;
40
+ }
41
+
42
+ /// <summary>Creates optimization information for searching with the pattern represented by <paramref name="root"/>.</summary>
43
+ /// <param name="root">The root of the pattern node tree.</param>
44
+ /// <param name="options">Options used when creating the regex.</param>
45
+ /// <param name="isLeadingPartial">true if <paramref name="root"/> may not represent the whole pattern, only a leading node in it.</param>
46
+ private RegexFindOptimizations ( RegexNode root , RegexOptions options , bool isLeadingPartial )
19
47
{
20
- _rightToLeft = ( options & RegexOptions . RightToLeft ) != 0 ;
48
+ bool rightToLeft = ( options & RegexOptions . RightToLeft ) != 0 ;
49
+ Debug . Assert ( ! isLeadingPartial || ! rightToLeft , "RightToLeft unexpected when isLeadingPartial" ) ;
21
50
22
51
MinRequiredLength = root . ComputeMinLength ( ) ;
23
52
24
53
// Compute any anchor starting the expression. If there is one, we won't need to search for anything,
25
54
// as we can just match at that single location.
26
55
LeadingAnchor = RegexPrefixAnalyzer . FindLeadingAnchor ( root ) ;
27
- if ( _rightToLeft && LeadingAnchor == RegexNodeKind . Bol )
56
+ if ( rightToLeft && LeadingAnchor == RegexNodeKind . Bol )
28
57
{
29
58
// Filter out Bol for RightToLeft, as we don't currently optimize for it.
30
59
LeadingAnchor = RegexNodeKind . Unknown ;
31
60
}
32
61
if ( LeadingAnchor is RegexNodeKind . Beginning or RegexNodeKind . Start or RegexNodeKind . EndZ or RegexNodeKind . End )
33
62
{
34
- FindMode = ( LeadingAnchor , _rightToLeft ) switch
63
+ FindMode = ( LeadingAnchor , rightToLeft ) switch
35
64
{
36
65
( RegexNodeKind . Beginning , false ) => FindNextStartingPositionMode . LeadingAnchor_LeftToRight_Beginning ,
37
66
( RegexNodeKind . Beginning , true ) => FindNextStartingPositionMode . LeadingAnchor_RightToLeft_Beginning ,
@@ -47,7 +76,8 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
47
76
48
77
// Compute any anchor trailing the expression. If there is one, and we can also compute a fixed length
49
78
// for the whole expression, we can use that to quickly jump to the right location in the input.
50
- if ( ! _rightToLeft ) // haven't added FindNextStartingPositionMode trailing anchor support for RTL
79
+ if ( ! rightToLeft && // haven't added FindNextStartingPositionMode trailing anchor support for RTL
80
+ ! isLeadingPartial ) // trailing anchors in a partial root aren't relevant
51
81
{
52
82
TrailingAnchor = RegexPrefixAnalyzer . FindTrailingAnchor ( root ) ;
53
83
if ( TrailingAnchor is RegexNodeKind . End or RegexNodeKind . EndZ &&
@@ -70,7 +100,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
70
100
if ( prefix . Length > 1 )
71
101
{
72
102
LeadingPrefix = prefix ;
73
- FindMode = _rightToLeft ?
103
+ FindMode = rightToLeft ?
74
104
FindNextStartingPositionMode . LeadingString_RightToLeft :
75
105
FindNextStartingPositionMode . LeadingString_LeftToRight ;
76
106
return ;
@@ -89,7 +119,7 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
89
119
// more expensive; someone who wants to pay to do more work can specify Compiled. So for the interpreter
90
120
// we focus only on creating a set for the first character. Same for right-to-left, which is used very
91
121
// rarely and thus we don't need to invest in special-casing it.
92
- if ( _rightToLeft )
122
+ if ( rightToLeft )
93
123
{
94
124
// Determine a set for anything that can possibly start the expression.
95
125
if ( RegexPrefixAnalyzer . FindFirstCharClass ( root ) is string charClass )
@@ -253,21 +283,21 @@ public RegexFindOptimizations(RegexNode root, RegexOptions options)
253
283
public FindNextStartingPositionMode FindMode { get ; } = FindNextStartingPositionMode . NoSearch ;
254
284
255
285
/// <summary>Gets the leading anchor (e.g. RegexNodeKind.Bol) if one exists and was computed.</summary>
256
- public RegexNodeKind LeadingAnchor { get ; }
286
+ public RegexNodeKind LeadingAnchor { get ; private set ; }
257
287
258
288
/// <summary>Gets the trailing anchor (e.g. RegexNodeKind.Bol) if one exists and was computed.</summary>
259
289
public RegexNodeKind TrailingAnchor { get ; }
260
290
261
291
/// <summary>Gets the minimum required length an input need be to match the pattern.</summary>
262
292
/// <remarks>0 is a valid minimum length. This value may also be the max (and hence fixed) length of the expression.</remarks>
263
- public int MinRequiredLength { get ; }
293
+ public int MinRequiredLength { get ; private set ; }
264
294
265
295
/// <summary>The maximum possible length an input could be to match the pattern.</summary>
266
296
/// <remarks>
267
297
/// This is currently only set when <see cref="TrailingAnchor"/> is found to be an end anchor.
268
298
/// That can be expanded in the future as needed.
269
299
/// </remarks>
270
- public int ? MaxPossibleLength { get ; }
300
+ public int ? MaxPossibleLength { get ; private set ; }
271
301
272
302
/// <summary>Gets the leading prefix. May be an empty string.</summary>
273
303
public string LeadingPrefix { get ; } = string . Empty ;
0 commit comments