@@ -33,10 +33,8 @@ static class ContentValidation
3333 new ( "kind of" , "similar or approximately " )
3434 ] ) ;
3535
36- static List < string > invalidStrings ;
37-
38- static List < string > invalidWords =
39- [
36+ static FrozenSet < string > invalidWordSet = new [ ]
37+ {
4038 "you" ,
4139 "we" ,
4240 "our" ,
@@ -64,13 +62,16 @@ static class ContentValidation
6462 "whereat" ,
6563 "wherein" ,
6664 "whereof"
67- ] ;
68-
69- static ContentValidation ( ) =>
70- invalidStrings = BuildInvalidStrings ( ) . ToList ( ) ;
65+ } . ToFrozenSet ( ) ;
7166
72- static IEnumerable < string > BuildInvalidStrings ( ) =>
73- invalidWords . Select ( word => $ " { word } ") ;
67+ static FrozenDictionary < string , KeyValuePair < string , string > [ ] > phrasesByFirstWord =
68+ phrases
69+ . GroupBy ( p =>
70+ {
71+ var spaceIndex = p . Key . IndexOf ( ' ' ) ;
72+ return spaceIndex == - 1 ? p . Key : p . Key [ ..spaceIndex ] ;
73+ } )
74+ . ToFrozenDictionary ( g => g . Key , g => g . ToArray ( ) ) ;
7475
7576 public static IEnumerable < ( string error , int column ) > Verify ( string line )
7677 {
@@ -88,29 +89,65 @@ static IEnumerable<string> BuildInvalidStrings() =>
8889 yield return ( message , exclamationIndex1 ) ;
8990 }
9091
91- foreach ( var invalidString in invalidStrings )
92+ // Tokenize words with positions
93+ var words = Tokenize ( cleanedLine ) ;
94+
95+ // Check invalid words via set lookup (report first occurrence only)
96+ var seenWords = new HashSet < string > ( ) ;
97+ foreach ( var ( word , start ) in words )
9298 {
93- var indexOf = cleanedLine . IndexOf ( invalidString ) ;
94- if ( indexOf == - 1 )
99+ if ( invalidWordSet . Contains ( word ) && seenWords . Add ( word ) )
95100 {
96- continue ;
101+ yield return ( $ "Invalid word detected: ' { word } '" , start - 1 ) ;
97102 }
103+ }
98104
99- var error = $ "Invalid word detected: '{ invalidString . Trim ( ) } '";
100- yield return ( error , indexOf ) ;
105+ // Check phrases via first-word lookup (report first occurrence only)
106+ var seenPhrases = new HashSet < string > ( ) ;
107+ foreach ( var ( word , start ) in words )
108+ {
109+ if ( phrasesByFirstWord . TryGetValue ( word , out var candidates ) )
110+ {
111+ foreach ( var candidate in candidates )
112+ {
113+ if ( seenPhrases . Contains ( candidate . Key ) )
114+ {
115+ continue ;
116+ }
117+
118+ if ( cleanedLine . AsSpan ( start ) . StartsWith ( candidate . Key . AsSpan ( ) , StringComparison . Ordinal ) )
119+ {
120+ seenPhrases . Add ( candidate . Key ) ;
121+ yield return ( $ "Invalid phrase detected: '{ candidate . Key } '. Instead consider '{ candidate . Value } '", start ) ;
122+ }
123+ }
124+ }
101125 }
126+ }
102127
103- foreach ( var phrase in phrases )
128+ static List < ( string word , int start ) > Tokenize ( string cleanedLine )
129+ {
130+ var words = new List < ( string word , int start ) > ( ) ;
131+ var span = cleanedLine . AsSpan ( ) ;
132+ var pos = 0 ;
133+ while ( pos < span . Length )
104134 {
105- var indexOf = cleanedLine . IndexOf ( phrase . Key ) ;
106- if ( indexOf == - 1 )
135+ if ( span [ pos ] == ' ' )
107136 {
137+ pos ++ ;
108138 continue ;
109139 }
110140
111- var error = $ "Invalid phrase detected: '{ phrase . Key } '. Instead consider '{ phrase . Value } '";
112- yield return ( error , indexOf ) ;
141+ var wordStart = pos ;
142+ while ( pos < span . Length && span [ pos ] != ' ' )
143+ {
144+ pos ++ ;
145+ }
146+
147+ words . Add ( ( span [ wordStart ..pos ] . ToString ( ) , wordStart ) ) ;
113148 }
149+
150+ return words ;
114151 }
115152
116153 static string Clean ( string input )
0 commit comments