@@ -50,11 +50,11 @@ private RegexPrefixAnalyzer(Span<int> intStack)
5050 _skipAllChildren = false ;
5151 }
5252
53- /// <summary>Computes the leading substring in <paramref name="tree "/>; may be empty.</summary>
54- public static string FindCaseSensitivePrefix ( RegexTree tree )
53+ /// <summary>Computes the leading substring in <paramref name="node "/>; may be empty.</summary>
54+ public static string FindCaseSensitivePrefix ( RegexNode node )
5555 {
5656 var vsb = new ValueStringBuilder ( stackalloc char [ 64 ] ) ;
57- Process ( tree . Root , ref vsb ) ;
57+ Process ( node , ref vsb ) ;
5858 return vsb . ToString ( ) ;
5959
6060 // Processes the node, adding any prefix text to the builder.
@@ -87,6 +87,59 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb)
8787 return ! rtl ;
8888 }
8989
90+ // Alternation: find a string that's a shared prefix of all branches
91+ case RegexNodeKind . Alternate :
92+ {
93+ int childCount = node . ChildCount ( ) ;
94+
95+ // Store the initial branch into the target builder, keeping track
96+ // of how much was appended. Any of this contents that doesn't overlap
97+ // will every other branch will be removed before returning.
98+ int initialLength = vsb . Length ;
99+ Process ( node . Child ( 0 ) , ref vsb ) ;
100+ int addedLength = vsb . Length - initialLength ;
101+
102+ // Then explore the rest of the branches, finding the length
103+ // of prefix they all share in common with the initial branch.
104+ if ( addedLength != 0 )
105+ {
106+ var alternateSb = new ValueStringBuilder ( 64 ) ;
107+
108+ // Process each branch. If we reach a point where we've proven there's
109+ // no overlap, we can bail early.
110+ for ( int i = 1 ; i < childCount && addedLength != 0 ; i ++ )
111+ {
112+ alternateSb . Length = 0 ;
113+
114+ // Process the branch into a temporary builder.
115+ Process ( node . Child ( i ) , ref alternateSb ) ;
116+
117+ // Find how much overlap there is between this branch's prefix
118+ // and the smallest amount of prefix that overlapped with all
119+ // the previously seen branches.
120+ addedLength = Math . Min ( addedLength , alternateSb . Length ) ;
121+ for ( int j = 0 ; j < addedLength ; j ++ )
122+ {
123+ if ( vsb [ initialLength + j ] != alternateSb [ j ] )
124+ {
125+ addedLength = j ;
126+ break ;
127+ }
128+ }
129+ }
130+
131+ alternateSb . Dispose ( ) ;
132+
133+ // Then cull back on what was added based on the other branches.
134+ vsb . Length = initialLength + addedLength ;
135+ }
136+
137+ // Don't explore anything after the alternation. We could make this work if desirable,
138+ // but it's currently not worth the extra complication. The entire contents of every
139+ // branch would need to be identical other than zero-width anchors/assertions.
140+ return false ;
141+ }
142+
90143 // One character
91144 case RegexNodeKind . One when ( node . Options & RegexOptions . IgnoreCase ) == 0 :
92145 vsb . Append ( node . Ch ) ;
0 commit comments