Skip to content

Commit 2901b54

Browse files
authored
use Frozen for ContentValidation (#749)
1 parent 54f8cc7 commit 2901b54

2 files changed

Lines changed: 62 additions & 25 deletions

File tree

src/MarkdownSnippets/ContentValidation.cs

Lines changed: 58 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@ static class ContentValidation
3333
new("kind of", "similar or approximately ")
3434
]);
3535

36-
static List<string> invalidStrings;
37-
38-
static List<string> invalidWords =
39-
[
36+
static FrozenSet<string> invalidWordSet = new[]
37+
{
4038
"you",
4139
"we",
4240
"our",
@@ -64,13 +62,16 @@ static class ContentValidation
6462
"whereat",
6563
"wherein",
6664
"whereof"
67-
];
68-
69-
static ContentValidation() =>
70-
invalidStrings = BuildInvalidStrings().ToList();
65+
}.ToFrozenSet();
7166

72-
static IEnumerable<string> BuildInvalidStrings() =>
73-
invalidWords.Select(word => $" {word} ");
67+
static FrozenDictionary<string, KeyValuePair<string, string>[]> phrasesByFirstWord =
68+
phrases
69+
.GroupBy(p =>
70+
{
71+
var spaceIndex = p.Key.IndexOf(' ');
72+
return spaceIndex == -1 ? p.Key : p.Key[..spaceIndex];
73+
})
74+
.ToFrozenDictionary(g => g.Key, g => g.ToArray());
7475

7576
public static IEnumerable<(string error, int column)> Verify(string line)
7677
{
@@ -88,29 +89,65 @@ static IEnumerable<string> BuildInvalidStrings() =>
8889
yield return (message, exclamationIndex1);
8990
}
9091

91-
foreach (var invalidString in invalidStrings)
92+
// Tokenize words with positions
93+
var words = Tokenize(cleanedLine);
94+
95+
// Check invalid words via set lookup (report first occurrence only)
96+
var seenWords = new HashSet<string>();
97+
foreach (var (word, start) in words)
9298
{
93-
var indexOf = cleanedLine.IndexOf(invalidString);
94-
if (indexOf == -1)
99+
if (invalidWordSet.Contains(word) && seenWords.Add(word))
95100
{
96-
continue;
101+
yield return ($"Invalid word detected: '{word}'", start - 1);
97102
}
103+
}
98104

99-
var error = $"Invalid word detected: '{invalidString.Trim()}'";
100-
yield return (error, indexOf);
105+
// Check phrases via first-word lookup (report first occurrence only)
106+
var seenPhrases = new HashSet<string>();
107+
foreach (var (word, start) in words)
108+
{
109+
if (phrasesByFirstWord.TryGetValue(word, out var candidates))
110+
{
111+
foreach (var candidate in candidates)
112+
{
113+
if (seenPhrases.Contains(candidate.Key))
114+
{
115+
continue;
116+
}
117+
118+
if (cleanedLine.AsSpan(start).StartsWith(candidate.Key.AsSpan(), StringComparison.Ordinal))
119+
{
120+
seenPhrases.Add(candidate.Key);
121+
yield return ($"Invalid phrase detected: '{candidate.Key}'. Instead consider '{candidate.Value}'", start);
122+
}
123+
}
124+
}
101125
}
126+
}
102127

103-
foreach (var phrase in phrases)
128+
static List<(string word, int start)> Tokenize(string cleanedLine)
129+
{
130+
var words = new List<(string word, int start)>();
131+
var span = cleanedLine.AsSpan();
132+
var pos = 0;
133+
while (pos < span.Length)
104134
{
105-
var indexOf = cleanedLine.IndexOf(phrase.Key);
106-
if (indexOf == -1)
135+
if (span[pos] == ' ')
107136
{
137+
pos++;
108138
continue;
109139
}
110140

111-
var error = $"Invalid phrase detected: '{phrase.Key}'. Instead consider '{phrase.Value}'";
112-
yield return (error, indexOf);
141+
var wordStart = pos;
142+
while (pos < span.Length && span[pos] != ' ')
143+
{
144+
pos++;
145+
}
146+
147+
words.Add((span[wordStart..pos].ToString(), wordStart));
113148
}
149+
150+
return words;
114151
}
115152

116153
static string Clean(string input)

src/Tests/ContentValidationTest.CheckInvalidWordIndicatesAllViolationsInTheExceptionMessageIgnoringCase.verified.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@
77
Item1: Invalid word detected: 'you',
88
Item2: 1
99
},
10-
{
11-
Item1: Invalid word detected: 'us',
12-
Item2: 37
13-
},
1410
{
1511
Item1: Invalid word detected: 'yourself',
1612
Item2: 27
13+
},
14+
{
15+
Item1: Invalid word detected: 'us',
16+
Item2: 37
1717
}
1818
]

0 commit comments

Comments
 (0)