Skip to content

Commit 300777e

Browse files
committed
Release v0.1
1 parent 3b354ae commit 300777e

37 files changed

+629
-308
lines changed

README.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,15 @@
11
# CodeIndex
2-
A Code Searching Tools Based On Lucene.Net
2+
3+
A fast code searching tools based on Lucene.Net
4+
5+
## Use It On Your Local
6+
7+
### Run CodeIndex.ConsoleApp build your code index
8+
9+
Change the "Program.cs" code folder and lucene index folder to your local one
10+
Wait the index finished build
11+
12+
### Run CodeIndex.Server
13+
14+
Config the appsetting.json => LuceneIndex to your local index folder
15+
Run server and doing the searching

doc/WebServer-Details.png

87.8 KB
Loading

doc/WebServer.png

88.4 KB
Loading

src/CodeIndex.Common/ExtendMethods.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,12 @@ public static string SubStringSafe(this string str, int startIndex, int length)
1010

1111
if (!string.IsNullOrEmpty(str))
1212
{
13-
result = str.Substring(startIndex, Math.Min(length, str.Length - startIndex));
13+
length = Math.Min(length, str.Length - startIndex);
14+
15+
if(length > 0)
16+
{
17+
result = str.Substring(startIndex, length);
18+
}
1419
}
1520

1621
return result;

src/CodeIndex.Common/SearchCandidate.cs

Lines changed: 0 additions & 9 deletions
This file was deleted.

src/CodeIndex.Common/Storage.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ namespace CodeIndex.Common
44
{
55
public class Storage
66
{
7-
Dictionary<string, object> Items { get; set; } = new Dictionary<string, object>();
7+
Dictionary<string, object> Items { get; } = new Dictionary<string, object>();
88
public string UserName { get; set; }
99

1010
public object GetValue(string key)

src/CodeIndex.IndexBuilder/CodeIndex.IndexBuilder.csproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
<ItemGroup>
88
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00007" />
99
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00007" />
10+
<PackageReference Include="Lucene.Net.Analysis.SmartCn" Version="4.8.0-beta00007" />
1011
<PackageReference Include="Lucene.Net.QueryParser" Version="4.8.0-beta00007" />
1112
</ItemGroup>
1213

src/CodeIndex.IndexBuilder/CodeIndexBuilder.cs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,26 +76,31 @@ static string ToStringSafe(this string value)
7676
return value ?? string.Empty;
7777
}
7878

79-
public static Document GetDocumentFromSource(CodeSource source, bool needPreprocessing = true)
79+
public static Document GetDocumentFromSource(CodeSource source)
8080
{
81-
if (needPreprocessing)
82-
{
83-
source.Content = SimpleCodeContentProcessing.Preprocessing(source.Content);
84-
}
85-
8681
return new Document
8782
{
83+
new TextField(nameof(source.FileName), source.FileName.ToStringSafe(), Field.Store.YES),
8884
// StringField indexes but doesn't tokenize
89-
new StringField(nameof(source.FileName), source.FileName.ToStringSafe(), Field.Store.YES),
9085
new StringField(nameof(source.FileExtension), source.FileExtension.ToStringSafe(), Field.Store.YES),
91-
new StringField(nameof(source.FilePath), source.FilePath.ToStringSafe(), Field.Store.YES),
86+
new StringField(nameof(source.FilePath) + Constants.NoneTokenizeFieldSuffix, source.FilePath.ToStringSafe(), Field.Store.YES),
87+
new TextField(nameof(source.FilePath), source.FilePath.ToStringSafe(), Field.Store.YES),
9288
new TextField(nameof(source.Content), source.Content.ToStringSafe(), Field.Store.YES),
9389
new Int64Field(nameof(source.IndexDate), source.IndexDate.Ticks, Field.Store.YES),
9490
new Int64Field(nameof(source.LastWriteTimeUtc), source.LastWriteTimeUtc.Ticks, Field.Store.YES),
9591
new StringField(nameof(source.CodePK), source.CodePK.ToString(), Field.Store.YES)
9692
};
9793
}
9894

95+
public static void UpdateCodeFilePath(Document codeSourceDocumnet, string oldFullPath, string nowFullPath)
96+
{
97+
var pathField = codeSourceDocumnet.Get(nameof(CodeSource.FilePath));
98+
codeSourceDocumnet.RemoveField(nameof(CodeSource.FilePath));
99+
codeSourceDocumnet.RemoveField(nameof(CodeSource.FilePath) + Constants.NoneTokenizeFieldSuffix);
100+
codeSourceDocumnet.Add(new TextField(nameof(CodeSource.FilePath), pathField.Replace(oldFullPath, nowFullPath), Field.Store.YES));
101+
codeSourceDocumnet.Add(new StringField(nameof(CodeSource.FilePath) + Constants.NoneTokenizeFieldSuffix, pathField.Replace(oldFullPath, nowFullPath), Field.Store.YES));
102+
}
103+
99104
public static Document GetDocument(string luceneIndex, Term term)
100105
{
101106
luceneIndex.RequireNotNullOrEmpty(nameof(luceneIndex));

src/CodeIndex.IndexBuilder/CodeTokenUtils/SimpleCodeAnalyzer.cs renamed to src/CodeIndex.IndexBuilder/CodeTokenUtils/CodeAnalyzer.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,20 @@
55

66
namespace CodeIndex.IndexBuilder
77
{
8-
public class SimpleCodeAnalyzer : Analyzer
8+
public class CodeAnalyzer : Analyzer
99
{
1010
readonly LuceneVersion luceneVersion;
1111
readonly bool lowerCase;
1212

13-
public SimpleCodeAnalyzer(LuceneVersion luceneVersion, bool lowerCase)
13+
public CodeAnalyzer(LuceneVersion luceneVersion, bool lowerCase)
1414
{
1515
this.luceneVersion = luceneVersion;
1616
this.lowerCase = lowerCase;
1717
}
1818

1919
protected override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
2020
{
21-
var tokenizer = new SimpleCodeTokenizer(luceneVersion, reader);
21+
var tokenizer = new CodeTokenizer(reader);
2222

2323
if (lowerCase)
2424
{
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
namespace CodeIndex.IndexBuilder
2+
{
3+
public static class CodeContentProcessing
4+
{
5+
public const string HighLightPrefix = "0ffc7664bb0";
6+
public const string HighLightSuffix = "b17f5526cc3";
7+
}
8+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
using System.Collections.Generic;
2+
using System.Globalization;
3+
using System.IO;
4+
using ICU4N.Text;
5+
using Lucene.Net.Analysis.Cn.Smart.Hhmm;
6+
using Lucene.Net.Analysis.TokenAttributes;
7+
using Lucene.Net.Analysis.Util;
8+
9+
namespace CodeIndex.IndexBuilder
10+
{
11+
/// <summary>
12+
/// Reference the SmartCn Tokenizer
13+
/// </summary>
14+
internal class CodeTokenizer : SegmentingTokenizerBase
15+
{
16+
static readonly BreakIterator sentenceProto = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
17+
readonly WordSegmenter wordSegmenter = new WordSegmenter();
18+
19+
readonly ICharTermAttribute termAtt;
20+
readonly IOffsetAttribute offsetAtt;
21+
readonly ITypeAttribute typeAtt;
22+
23+
IEnumerator<SegToken> tokens;
24+
25+
public CodeTokenizer(TextReader reader) : base(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, reader, (BreakIterator)sentenceProto.Clone())
26+
{
27+
termAtt = AddAttribute<ICharTermAttribute>();
28+
offsetAtt = AddAttribute<IOffsetAttribute>();
29+
typeAtt = AddAttribute<ITypeAttribute>();
30+
}
31+
32+
protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
33+
{
34+
var sentence = new string(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
35+
tokens = wordSegmenter.SegmentSentence(sentence, m_offset + sentenceStart).GetEnumerator();
36+
}
37+
38+
protected override bool IncrementWord()
39+
{
40+
if (tokens == null || !tokens.MoveNext())
41+
{
42+
return false;
43+
}
44+
else
45+
{
46+
var token = tokens.Current;
47+
ClearAttributes();
48+
termAtt.CopyBuffer(token.CharArray, 0, token.CharArray.Length);
49+
offsetAtt.SetOffset(CorrectOffset(token.StartOffset), CorrectOffset(token.EndOffset));
50+
typeAtt.Type = "word";
51+
return true;
52+
}
53+
}
54+
55+
public override void Reset()
56+
{
57+
base.Reset();
58+
tokens = null;
59+
}
60+
}
61+
}

src/CodeIndex.IndexBuilder/CodeTokenUtils/SimpleCodeContentProcessing.cs

Lines changed: 0 additions & 68 deletions
This file was deleted.

src/CodeIndex.IndexBuilder/CodeTokenUtils/SimpleCodeTokenizer.cs

Lines changed: 0 additions & 18 deletions
This file was deleted.
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
using Lucene.Net.Analysis.Cn.Smart;
4+
using Lucene.Net.Analysis.Cn.Smart.Hhmm;
5+
6+
namespace CodeIndex.IndexBuilder
7+
{
8+
/// <summary>
9+
/// Reference the SmartCn WordSegmenter
10+
/// </summary>
11+
internal class WordSegmenter
12+
{
13+
/// <summary>
14+
/// Segment a sentence into words with <see cref="WordSegmenter"/>
15+
/// </summary>
16+
/// <param name="sentence">input sentence</param>
17+
/// <param name="startOffset"> start offset of sentence</param>
18+
/// <returns><see cref="IList{T}"/> of <see cref="SegToken"/>.</returns>
19+
public virtual IList<SegToken> SegmentSentence(string sentence, int startOffset)
20+
{
21+
22+
var segTokenList = GetSegToken(sentence);
23+
24+
foreach (SegToken st in segTokenList)
25+
{
26+
ConvertSegToken(st, sentence, startOffset);
27+
}
28+
29+
return segTokenList;
30+
}
31+
32+
33+
List<SegToken> emptySegTokenList = new List<SegToken>();
34+
35+
IList<SegToken> GetSegToken(string sentence)
36+
{
37+
var segTokenList = emptySegTokenList;
38+
39+
if (!string.IsNullOrEmpty(sentence))
40+
{
41+
var charArray = sentence.ToCharArray();
42+
43+
segTokenList = new List<SegToken>();
44+
var length = 0;
45+
var startIndex = -1;
46+
47+
for (var index = 0; index < charArray.Length; index++)
48+
{
49+
if (!SpaceLike(charArray[index]))
50+
{
51+
var charInt = (int)charArray[index];
52+
if (IsSpecialChar(charInt))
53+
{
54+
AddSegTokenIfNeeded();
55+
56+
segTokenList.Add(new SegToken(charArray, index, index + 1, WordType.STRING, 0));
57+
}
58+
else
59+
{
60+
if (startIndex == -1)
61+
{
62+
startIndex = index;
63+
}
64+
65+
length++;
66+
}
67+
}
68+
else
69+
{
70+
AddSegTokenIfNeeded();
71+
}
72+
}
73+
74+
AddSegTokenIfNeeded();
75+
76+
void AddSegTokenIfNeeded()
77+
{
78+
if (length > 0)
79+
{
80+
segTokenList.Add(new SegToken(charArray, startIndex, startIndex + length, WordType.STRING, 0));
81+
length = 0;
82+
startIndex = -1;
83+
}
84+
}
85+
}
86+
87+
return segTokenList;
88+
}
89+
90+
public virtual SegToken ConvertSegToken(SegToken st, string sentence,
91+
int sentenceStartOffset)
92+
{
93+
94+
st.CharArray = sentence.Substring(st.StartOffset, st.EndOffset - st.StartOffset).ToCharArray();
95+
st.StartOffset += sentenceStartOffset;
96+
st.EndOffset += sentenceStartOffset;
97+
98+
return st;
99+
}
100+
101+
static bool SpaceLike(char ch)
102+
{
103+
return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' || ch == ' ';
104+
}
105+
106+
static bool IsSpecialChar(int character) => (character >= 33 || character <= 126) && SpecialCharRange.Any(u => u.Start <= character && u.End >= character);
107+
108+
readonly static HashSet<(int Start, int End)> SpecialCharRange = new HashSet<(int, int)>()
109+
{
110+
(33, 47),
111+
(58, 64),
112+
(91, 96),
113+
(123, 126)
114+
};
115+
}
116+
}

0 commit comments

Comments
 (0)