Skip to content

Commit ab1caa0

Browse files
committed
Add ut and word hint logic
1 parent c88294b commit ab1caa0

File tree

5 files changed

+181
-33
lines changed

5 files changed

+181
-33
lines changed

src/CodeIndex.IndexBuilder/WordsHintBuilder.cs

+22-4
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
using System.Collections.Generic;
2+
using System.Linq;
23
using CodeIndex.Common;
34
using Lucene.Net.Documents;
45

56
namespace CodeIndex.IndexBuilder
67
{
78
public static class WordsHintBuilder
89
{
9-
// TODO: Add test
10-
11-
static HashSet<string> Words { get; } = new HashSet<string>();
10+
public static HashSet<string> Words { get; } = new HashSet<string>();
1211

1312
public static void BuildIndexByBatch(CodeIndexConfiguration config, bool triggerMerge, bool applyAllDeletes, bool needFlush, ILog log, int batchSize = 10000)
1413
{
@@ -44,7 +43,7 @@ public static void BuildIndexByBatch(CodeIndexConfiguration config, bool trigger
4443
log?.Info($"Finished build hint words for {config.LuceneIndexForHint}");
4544
}
4645

47-
internal static void AddWords(string[] words)
46+
public static void AddWords(string[] words)
4847
{
4948
foreach (var word in words)
5049
{
@@ -55,6 +54,25 @@ internal static void AddWords(string[] words)
5554
}
5655
}
5756

57+
public static void UpdateWordsAndUpdateIndex(CodeIndexConfiguration config, string[] words, ILog log)
58+
{
59+
log?.Info($"Update hint index start, words count {words.Length}");
60+
words = words.Where(u => u.Length > 3).Distinct().ToArray();
61+
62+
foreach (var word in words)
63+
{
64+
var document = new Document
65+
{
66+
new StringField(nameof(CodeWord.Word), word, Field.Store.YES),
67+
new StringField(nameof(CodeWord.WordLower), word.ToLowerInvariant(), Field.Store.YES)
68+
};
69+
70+
LucenePool.UpdateIndex(config.LuceneIndexForHint, new Lucene.Net.Index.Term(nameof(CodeWord.Word), word), document);
71+
}
72+
73+
log?.Info($"Update hint index finished");
74+
}
75+
5876
static void BuildIndex(CodeIndexConfiguration config, bool triggerMerge, bool applyAllDeletes, List<Document> documents, bool needFlush, ILog log)
5977
{
6078
log?.Info($"Build index start, documents count {documents.Count}");

src/CodeIndex.MaintainIndex/CodeFilesIndexMaintainer.cs

+10-18
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ namespace CodeIndex.MaintainIndex
1616
{
1717
public class CodeFilesIndexMaintainer : IDisposable
1818
{
19-
// TODO: Add word hint when file changed
20-
2119
public CodeFilesIndexMaintainer(CodeIndexConfiguration config, string[] excludedExtensions, string[] excludedPaths, int saveIntervalSeconds = 300, string[] includedExtensions = null, ILog log = null)
2220
{
2321
config.RequireNotNull(nameof(config));
@@ -80,7 +78,6 @@ public void Dispose()
8078
LucenePool.SaveResultsAndClearLucenePool(config);
8179
}
8280

83-
// TODO: Add a boolean field to determine initialize is finished
8481
FileSystemWatcher FileSystemWatcher { get; set; }
8582
const int WaitMilliseconds = 100;
8683

@@ -157,18 +154,10 @@ void RenamedEventHandler(object sender, RenamedEventArgs e)
157154

158155
bool IsExcludedFromIndex(FileSystemEventArgs e)
159156
{
160-
var excluded = true;
161-
162-
if (IsFile(e.FullPath))
163-
{
164-
excluded = excludedPaths.Any(u => e.FullPath.ToUpperInvariant().Contains(u))
157+
var excluded = excludedPaths.Any(u => e.FullPath.ToUpperInvariant().Contains(u))
158+
|| excludedPaths.Any(u => e.FullPath.ToUpperInvariant().Contains(u))
165159
|| excludedExtensions.Any(u => e.FullPath.EndsWith(u, StringComparison.InvariantCultureIgnoreCase))
166160
|| includedExtensions != null && !includedExtensions.Any(u => e.FullPath.EndsWith(u, StringComparison.InvariantCultureIgnoreCase));
167-
}
168-
else if (IsDirectory(e.FullPath))
169-
{
170-
excluded = excludedPaths.Any(u => e.FullPath.ToUpperInvariant().Contains(u));
171-
}
172161

173162
if (excluded)
174163
{
@@ -191,6 +180,7 @@ void CreateNewIndex(string fullPath, PendingRetrySource pendingRetrySource = nul
191180
{
192181
var content = FilesContentHelper.ReadAllText(fullPath);
193182
CodeIndexBuilder.BuildIndex(config, false, false, false, new[] { CodeSource.GetCodeSource(fileInfo, content) });
183+
WordsHintBuilder.UpdateWordsAndUpdateIndex(config, WordSegmenter.GetWords(content), log);
194184
pendingChanges++;
195185
}
196186
}
@@ -217,9 +207,9 @@ void UpdateIndex(string fullPath, PendingRetrySource pendingRetrySource = null)
217207
if (fileInfo.Exists)
218208
{
219209
var content = FilesContentHelper.ReadAllText(fullPath);
220-
// TODO: When Date Not Change, Not Update
221210
var document = CodeIndexBuilder.GetDocumentFromSource(CodeSource.GetCodeSource(fileInfo, content));
222211
CodeIndexBuilder.UpdateIndex(config.LuceneIndexForCode, GetNoneTokenizeFieldTerm(nameof(CodeSource.FilePath), fullPath), document);
212+
WordsHintBuilder.UpdateWordsAndUpdateIndex(config, WordSegmenter.GetWords(content), log);
223213
pendingChanges++;
224214
}
225215
}
@@ -324,7 +314,7 @@ void AddFileChangesToRetrySouce(string fullPath, WatcherChangeTypes changesType,
324314
pendingRetryCodeSources.Enqueue(pendingRetrySource);
325315
}
326316

327-
ConcurrentQueue<PendingRetrySource> pendingRetryCodeSources = new ConcurrentQueue<PendingRetrySource>();
317+
protected ConcurrentQueue<PendingRetrySource> pendingRetryCodeSources = new ConcurrentQueue<PendingRetrySource>();
328318

329319
bool IsFile(string path)
330320
{
@@ -348,9 +338,9 @@ void RetryAllFailed(CancellationToken cancellationToken)
348338

349339
Task.Run(() =>
350340
{
351-
if (pendingRetrySource.LastRetryUTCDate > DateTime.UtcNow.AddSeconds(-10)) // Failed In 10 Seconds
341+
if (pendingRetrySource.LastRetryUTCDate > DateTime.UtcNow.AddSeconds(-5)) // Failed In 5 Seconds
352342
{
353-
Thread.Sleep(10000);
343+
Thread.Sleep(5000);
354344
}
355345

356346
switch (pendingRetrySource.ChangesType)
@@ -376,11 +366,13 @@ void RetryAllFailed(CancellationToken cancellationToken)
376366
}
377367
else
378368
{
379-
Thread.Sleep(10000); // Sleep 10 seconds when nothing need to requeue
369+
Thread.Sleep(SleepMilliseconds); // Sleep when nothing need to requeue
380370
}
381371
}
382372
}
383373

374+
protected virtual int SleepMilliseconds => 10000;
375+
384376
int pendingChanges = 0;
385377
DateTime lastSaveDate = DateTime.UtcNow;
386378

src/CodeIndex.MaintainIndex/PendingRetrySource.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
namespace CodeIndex.MaintainIndex
55
{
6-
class PendingRetrySource
6+
public class PendingRetrySource
77
{
88
public string FilePath { get; set; }
99
public string OldPath { get; set; }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
using System.Linq;
2+
using CodeIndex.Common;
3+
using CodeIndex.IndexBuilder;
4+
using Lucene.Net.Search;
5+
using NUnit.Framework;
6+
7+
namespace CodeIndex.Test
8+
{
9+
public class WordsHintBuilderTest : BaseTest
10+
{
11+
[Test]
12+
public void TestAddWords()
13+
{
14+
WordsHintBuilder.AddWords(new[] { "AAAA", "BBBB", "Ddddd", "AAAA", "EEE" });
15+
Assert.AreEqual(3, WordsHintBuilder.Words.Count, "Length must larger than 3 and skip duplicate");
16+
CollectionAssert.AreEquivalent(new[] { "AAAA", "BBBB", "Ddddd" }, WordsHintBuilder.Words);
17+
}
18+
19+
[Test]
20+
public void TestBuildIndexByBatch()
21+
{
22+
WordsHintBuilder.AddWords(new[] { "AAAA", "Bbbb", "DDDDD" });
23+
WordsHintBuilder.BuildIndexByBatch(Config, true, true, true, null);
24+
25+
var docs = LucenePool.Search(Config.LuceneIndexForHint, new MatchAllDocsQuery(), 1000);
26+
Assert.AreEqual(3, docs.Length);
27+
CollectionAssert.AreEquivalent(new[] { "AAAA", "Bbbb", "DDDDD" }, docs.Select(u => u.Get(nameof(CodeWord.Word))));
28+
CollectionAssert.AreEquivalent(new[] { "aaaa", "bbbb", "ddddd" }, docs.Select(u => u.Get(nameof(CodeWord.WordLower))));
29+
}
30+
31+
[Test]
32+
public void TestUpdateWordsAndUpdateIndex()
33+
{
34+
WordsHintBuilder.AddWords(new[] { "AAAA", "Bbbbb", "DDDDD" });
35+
WordsHintBuilder.BuildIndexByBatch(Config, true, true, true, null);
36+
37+
var docs = LucenePool.Search(Config.LuceneIndexForHint, new MatchAllDocsQuery(), 1000);
38+
Assert.AreEqual(3, docs.Length);
39+
40+
WordsHintBuilder.UpdateWordsAndUpdateIndex(Config, new[] { "AAAA", "Bbbbb", "EEEEE", "ABC" }, null);
41+
docs = LucenePool.Search(Config.LuceneIndexForHint, new MatchAllDocsQuery(), 1000);
42+
Assert.AreEqual(4, docs.Length, "Skip duplicate and length muse larger than 3");
43+
CollectionAssert.AreEquivalent(new[] { "AAAA", "Bbbbb", "DDDDD", "EEEEE" }, docs.Select(u => u.Get(nameof(CodeWord.Word))));
44+
CollectionAssert.AreEquivalent(new[] { "aaaa", "bbbbb", "ddddd", "eeeee" }, docs.Select(u => u.Get(nameof(CodeWord.WordLower))));
45+
}
46+
47+
[SetUp]
48+
protected override void SetUp()
49+
{
50+
base.SetUp();
51+
WordsHintBuilder.Words.Clear();
52+
}
53+
54+
[TearDown]
55+
protected override void TearDown()
56+
{
57+
base.TearDown();
58+
WordsHintBuilder.Words.Clear();
59+
}
60+
}
61+
}
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
using System;
2+
using System.Collections.Concurrent;
23
using System.IO;
4+
using System.Linq;
35
using System.Threading;
46
using CodeIndex.Common;
57
using CodeIndex.IndexBuilder;
68
using CodeIndex.MaintainIndex;
79
using CodeIndex.Search;
10+
using Lucene.Net.Search;
811
using NUnit.Framework;
912

1013
namespace CodeIndex.Test
@@ -14,7 +17,7 @@ public class CodeFilesIndexMaintainerTest : BaseTest
1417
[Test]
1518
public void TestMaintainerIndex()
1619
{
17-
var waitMS = 2000;
20+
var waitMS = 1500;
1821
Directory.CreateDirectory(MonitorFolder);
1922
Directory.CreateDirectory(Path.Combine(MonitorFolder, "FolderA"));
2023
Directory.CreateDirectory(Path.Combine(MonitorFolder, "FolderB"));
@@ -27,23 +30,97 @@ public void TestMaintainerIndex()
2730
File.Create(fileBPath).Close();
2831
File.AppendAllText(fileBPath, "this is a content for test, that's it\r\na new line;");
2932

33+
var fileCPath = Path.Combine(MonitorFolder, "CCC.xml");
34+
File.Create(fileCPath).Close();
35+
File.AppendAllText(fileCPath, "this is a content for test");
36+
37+
var fileDPath = Path.Combine(MonitorFolder, "DDD.txt");
38+
3039
CodeIndexBuilder.BuildIndex(Config, true, true, true,
31-
new[] { CodeSource.GetCodeSource(new FileInfo(fileAPath), "12345"),
32-
CodeSource.GetCodeSource(new FileInfo(fileBPath), "this is a content for test, that's it\r\na new line;") });
40+
new[]
41+
{
42+
CodeSource.GetCodeSource(new FileInfo(fileAPath), "12345"),
43+
CodeSource.GetCodeSource(new FileInfo(fileBPath), "this is a content for test, that's it\r\na new line;"),
44+
CodeSource.GetCodeSource(new FileInfo(fileCPath), "this is a content for test")
45+
});
46+
3347
LucenePool.SaveResultsAndClearLucenePool(Config.LuceneIndexForCode);
3448

35-
using var maintainer = new CodeFilesIndexMaintainer(Config, new[] { "dll" }, Array.Empty<string>(), 1);
49+
var codeSources = CodeIndexSearcher.SearchCode(Config.LuceneIndexForCode, new MatchAllDocsQuery(), 100);
50+
CollectionAssert.AreEquivalent(new[] { "AAA.cs", "BBB.xml", "CCC.xml" }, codeSources.Select(u => u.FileName));
51+
52+
using var maintainer = new CodeFilesIndexMaintainer(Config, new[] { ".dll" }, Array.Empty<string>(), 1);
3653
maintainer.StartWatch();
3754
maintainer.SetInitalizeFinishedToTrue();
38-
File.AppendAllText(fileAPath, "56789");
55+
56+
File.AppendAllText(fileAPath, "56789"); // Changed
57+
File.Delete(fileBPath); // Deleted
58+
File.Move(fileCPath, Path.Combine(MonitorFolder, "NewCCC.xml")); // Rename
59+
File.Create(fileDPath).Close(); // Created
60+
3961
Thread.Sleep(waitMS); // wait task finish saving
4062

41-
var index = CodeIndexSearcher.Search(Config.LuceneIndexForCode, Generator.GetQueryFromStr($"{nameof(CodeSource.FileName)}:\"AAA.cs\""), 10);
42-
Assert.AreEqual(1, index.Length);
43-
Assert.AreEqual("1234556789", index[0].Get(nameof(CodeSource.Content)));
63+
codeSources = CodeIndexSearcher.SearchCode(Config.LuceneIndexForCode, new MatchAllDocsQuery(), 100);
64+
65+
Assert.Multiple(() =>
66+
{
67+
CollectionAssert.AreEquivalent(new[] { "AAA.cs", "NewCCC.xml", "DDD.txt" }, codeSources.Select(u => u.FileName));
68+
CollectionAssert.AreEquivalent(new[] { "1234556789", "this is a content for test", string.Empty }, codeSources.Select(u => u.Content));
69+
CollectionAssert.AreEquivalent(new[] { fileAPath, Path.Combine(MonitorFolder, "NewCCC.xml"), fileDPath }, codeSources.Select(u => u.FilePath));
70+
});
71+
72+
maintainer.Dispose();
73+
}
74+
75+
[Test]
76+
public void TestMaintainerIndex_RetryFailed()
77+
{
78+
var waitMS = 1500;
79+
Directory.CreateDirectory(MonitorFolder);
80+
81+
var fileAPath = Path.Combine(MonitorFolder, "AAA.cs");
82+
File.Create(fileAPath).Close();
83+
File.AppendAllText(fileAPath, "12345");
84+
85+
using var maintainer = new CodeFilesIndexMaintainerForTest(Config, new[] { ".dll" }, Array.Empty<string>(), 1);
86+
maintainer.StartWatch();
87+
maintainer.SetInitalizeFinishedToTrue();
88+
89+
maintainer.PendingRetryCodeSources.Enqueue(new PendingRetrySource()
90+
{
91+
ChangesType = WatcherChangeTypes.Created,
92+
FilePath = fileAPath,
93+
LastRetryUTCDate = DateTime.Now.AddDays(-1)
94+
});
95+
96+
var retryTime = 3;
97+
var codeSources = Array.Empty<CodeSource>();
98+
99+
while (retryTime > 0)
100+
{
101+
Thread.Sleep(waitMS); // wait task finish saving
102+
retryTime--;
103+
codeSources = CodeIndexSearcher.SearchCode(Config.LuceneIndexForCode, new MatchAllDocsQuery(), 100);
104+
105+
if (codeSources.Length > 0)
106+
{
107+
break;
108+
}
109+
}
110+
111+
Assert.AreEqual(1, codeSources.Length);
112+
Assert.AreEqual("AAA.cs", codeSources[0].FileName);
44113
}
45114

46-
// TODO: Test False Tolerance
47-
// TODO: Test Directoy Change
115+
class CodeFilesIndexMaintainerForTest : CodeFilesIndexMaintainer
116+
{
117+
public CodeFilesIndexMaintainerForTest(CodeIndexConfiguration config, string[] excludedExtensions, string[] excludedPaths, int saveIntervalSeconds = 300, string[] includedExtensions = null, ILog log = null) : base(config, excludedExtensions, excludedPaths, saveIntervalSeconds, includedExtensions, log)
118+
{
119+
}
120+
121+
public ConcurrentQueue<PendingRetrySource> PendingRetryCodeSources => pendingRetryCodeSources;
122+
123+
protected override int SleepMilliseconds => 100;
124+
}
48125
}
49126
}

0 commit comments

Comments
 (0)