Skip to content

Commit 34de823

Browse files
committed
提交一版完善的词法处理,分析模块还需要继续进行
1 parent 04ec6c5 commit 34de823

File tree

5 files changed

+1145
-73
lines changed

5 files changed

+1145
-73
lines changed

src/main/java/io/mycat/MatchMethodGenerator.java

Lines changed: 179 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
package io.mycat;
22

33
import com.alibaba.druid.sql.parser.Token;
4+
import javafx.util.Pair;
45

5-
import java.util.HashMap;
6-
import java.util.List;
7-
import java.util.Map;
6+
import java.io.File;
7+
import java.io.IOException;
8+
import java.nio.file.Files;
9+
import java.nio.file.Paths;
10+
import java.util.*;
811
import java.util.stream.Collectors;
912
import java.util.stream.IntStream;
1013
import java.util.stream.Stream;
@@ -122,49 +125,59 @@ public class MatchMethodGenerator {
122125
map.put("INOUT", Token.INOUT);
123126
}
124127

125-
public static void main(String[] args) {
126-
//isXXXTokenGenerator();
127-
//skipXXXTokenGenerator();
128-
IntStream.range(24, 31).forEach(x -> {
129-
Map<Long, List<Token>> map = Stream.of(Token.values())
130-
.filter((t) -> t.name() != null)
131-
.collect(Collectors.groupingBy((t) -> {
132-
String name = t.name();
133-
char size = (char)name.length();
134-
int b = 378551;
135-
int a = 63689;
136-
int seed = 13131;
137-
long hash = 0;
138-
int low = 0;
139-
int high = 0;
140-
for(int i=0; i<size; i++) {
141-
char c = name.charAt(i);
142-
//BKDRHash
143-
low = low * seed + c;
144-
//RS Hash
145-
high = high * a + c;
146-
a *= b;
147-
};
148-
hash = (long)(high & 0x7FFFFFFF) << 32 | (long)(low & 0x7FFFFFFF);
149-
return (hash & (0xff << x));
150-
// return t.name().chars().sum();
151-
}
152-
));
153-
/*long count = map.entrySet().stream()
154-
.filter((k) -> k.getValue().size() > 2)
155-
.count();
156-
if (count == 0) {
157-
System.out.println("result = "+x);
128+
static final byte[] shrinkCharTbl = new byte[96];//为了压缩hash字符映射空间,再次进行转义
129+
static void initShrinkCharTbl () {
130+
shrinkCharTbl[0] = 1;//从 $ 开始计算
131+
IntStream.rangeClosed('0', '9').forEach(c -> shrinkCharTbl[c-'$'] = (byte)(c-'0'+2));
132+
IntStream.rangeClosed('A', 'Z').forEach(c -> shrinkCharTbl[c-'$'] = (byte)(c-'A'+12));
133+
shrinkCharTbl['_'-'$'] = (byte)38;
134+
}
135+
136+
static void sqlKeyHastTest() {
137+
initShrinkCharTbl();
138+
139+
IntStream.range(0, 54).forEach(x -> {
140+
// Map<Long, List<Token>> map = Stream.of(Token.values())
141+
Map<Long, List<String>> map = null;
142+
try {
143+
map = Files.lines(Paths.get("sql_tokens.txt"))
144+
.collect(Collectors.groupingBy((t) -> {
145+
String name = t;
146+
char size = (char)name.length();
147+
long seed = 41;
148+
long hash = 0;
149+
for(int i=0; i<size; i++) {
150+
byte c = shrinkCharTbl[name.charAt(i)-'$'];
151+
//BKDRHash
152+
hash = hash * seed + c;
153+
};
154+
return (long)((hash & (0x1ffL << (long)x)) >> (long)x);
155+
// return t.name().chars().sum();
156+
}
157+
));
158+
} catch (IOException e) {
159+
e.printStackTrace();
158160
}
159-
*/
160-
System.out.println("result = "+x+" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
161-
map.entrySet().stream()
162-
//.filter((k) -> k.getValue().size() > 1)
163-
.forEach((e) -> System.out.format("%d : %s %n", e.getKey(), e.getValue().toString()));
161+
Map.Entry<Long, List<String>> maxItem = map.entrySet().stream()
162+
//.filter((k) -> k.getValue().size() < 3)
163+
// .count();
164+
.max((a, b) -> a.getValue().size()>b.getValue().size()?1:(a.getValue().size()==b.getValue().size()?0:-1))
165+
.get();
166+
long count = map.entrySet().stream().count();
164167

168+
long max = maxItem.getValue().size();
169+
// if (count == 0) {
170+
System.out.println("result = "+x+" ; max repeat = "+max+" ; count = "+count);
171+
// }
165172

166-
});
167-
//当左移位数为
173+
// System.out.println("result = "+x+" >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
174+
// map.entrySet().stream()
175+
// .filter((k) -> k.getValue().size() > 1)
176+
// .forEach((e) -> System.out.format("%d : %s %n", e.getKey(), e.getValue().toString()));
177+
178+
});}
179+
180+
//当左移位数为以下数值时,SQL关键字8位索引不会发生碰撞
168181
// result = 24
169182
// result = 25
170183
// result = 26
@@ -173,7 +186,109 @@ public static void main(String[] args) {
173186
// result = 29
174187
// result = 30
175188
// result = 31
176-
//.forEach((e) -> System.out.format("%d : %s %n", e.getKey(), e.getValue().toString()));
189+
// }
190+
191+
static long genHash(char[] str) {
192+
int seed = 41;
193+
long hash = 0;
194+
for (char c: str) {
195+
//BKDRHash
196+
hash = hash * seed + shrinkCharTbl[c-'$'];
197+
}
198+
return hash;
199+
}
200+
201+
static boolean cmp(char[] str1, char[] str2) {
202+
if (str1.length == str2.length) {
203+
for (int i=0; i< str1.length; i++) {
204+
if (str1[i] != str2[i]) {
205+
return false;
206+
}
207+
}
208+
return true;
209+
}
210+
return false;
211+
}
212+
213+
static int collideCount = 0;
214+
static long hashCollideTest(final List<Pair<Long, char[]>> sqlKeys, final ArrayList<Character> srcArray,
215+
final int maxDepth, final int depth, final char[] str,
216+
final long totalCount, final long count) {
217+
long newCount = count;
218+
for (Character c: srcArray) {
219+
str[depth] = c;
220+
if (depth < maxDepth -1)
221+
newCount = hashCollideTest(sqlKeys, srcArray, maxDepth, depth+1, str, totalCount, newCount);
222+
else {
223+
final long hash = genHash(str);
224+
sqlKeys.forEach(x -> {
225+
if (x.getKey() == hash && !cmp(x.getValue(), str)) {
226+
collideCount++;
227+
System.out.println("Key '"+String.valueOf(x.getValue())+"' collides with '"+String.valueOf(str)+"' with hash : "+hash);
228+
}
229+
});
230+
newCount++;
231+
if (newCount%10000000 == 1) {
232+
Date now=new Date();
233+
System.out.println(now.toLocaleString()+" progress : "+newCount+"/"+totalCount);
234+
}
235+
}
236+
}
237+
return newCount;
238+
}
239+
240+
static void run() {
241+
ArrayList<Character> srcArray = new ArrayList<>();
242+
IntStream.range('A', 'Z').forEach(c -> srcArray.add((char)c));
243+
IntStream.range('0', '9').forEach(c -> srcArray.add((char)c));
244+
srcArray.add('_');
245+
srcArray.add('-');
246+
247+
List<Pair<Long, char[]>> sqlKeys = Stream.of(Token.values())
248+
.filter(t -> t.name()!=null)
249+
.map(x -> new Pair<>(genHash(x.name().toCharArray()), x.name().toCharArray()))
250+
.collect(Collectors.toList());
251+
252+
collideCount = 0;
253+
int maxDepth = 7;
254+
long totalCount = srcArray.size();
255+
for(int i=0; i<maxDepth-1; i++) totalCount *= srcArray.size();
256+
char[] str = new char[maxDepth];
257+
long count = hashCollideTest(sqlKeys, srcArray, maxDepth, 0, str, totalCount, 0);
258+
Date now = new Date();
259+
if (count != totalCount) {
260+
System.out.println(now.toLocaleString()+" finished : "+count+"/"+totalCount+" collideCount="+collideCount);
261+
} else {
262+
System.out.println(now.toLocaleString()+" success!"+" collideCount="+collideCount);
263+
264+
}
265+
}
266+
267+
static int BKRDHash(String str) {
268+
int hash = 0;
269+
int seed = 131;
270+
for(char c: str.toCharArray()) {
271+
hash = hash*seed + c;
272+
}
273+
return hash;
274+
}
275+
276+
static int RSHash(String str) {
277+
int b = 378551;
278+
int a = 63689;
279+
int hash = 0;
280+
for(char c: str.toCharArray()) {
281+
hash = hash*a + c;
282+
a *= b;
283+
}
284+
return hash;
285+
}
286+
287+
static void test1() {
288+
String a = "abcdefghijklmnopqrstuvwxyz";
289+
String b = "abcdefghijklmnopqrstuvwxyz";
290+
System.out.println(a+" : "+RSHash(a));
291+
System.out.println(b+" : "+RSHash(b));
177292
}
178293

179294
/**
@@ -210,4 +325,24 @@ static void skipXXXTokenGenerator() {
210325
System.out.format("final void skip%sToken() {\npos+=%d;\n}%n", keyword, keyword.length());
211326
});
212327
}
328+
static void GenerateSqlTokenHash() {
329+
initShrinkCharTbl();
330+
try {
331+
Files.lines(Paths.get("sql_tokens.txt")).forEach(x -> {
332+
System.out.format(" public static final long %s = 0x%xL;%n", x, genHash(x.toCharArray()));
333+
});
334+
// System.out.println("conflict count : "+count);
335+
} catch (IOException e) {
336+
e.printStackTrace();
337+
}
338+
}
339+
340+
public static void main(String[] args) {
341+
//isXXXTokenGenerator();
342+
//skipXXXTokenGenerator();
343+
// sqlKeyHastTest();
344+
// run();
345+
// test1();
346+
GenerateSqlTokenHash();
347+
}
213348
}

0 commit comments

Comments
 (0)