Skip to content

Commit 46d0083

Browse files
committed
新增基于token hash的解析器,暂时只做了词法切割和hash,还没做分析
1 parent 72899d2 commit 46d0083

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
package io.mycat;
2+
3+
import java.nio.charset.StandardCharsets;
4+
import java.util.stream.IntStream;
5+
6+
/**
7+
* Created by Kaiz on 2017/2/6.
8+
*/
9+
public class NewSQLParser {
10+
SQLContext context;
11+
SQLReader reader;
12+
13+
class HashArray {
14+
long[] hashArray = new long[1024];
15+
int pos = 0;
16+
17+
void init() {
18+
while(pos>=0) {
19+
hashArray[pos--] = 0;
20+
}
21+
pos = 0;
22+
};
23+
void set(long hash) { hashArray[pos++] = hash; }
24+
long get(int idx) { return hashArray[idx]; }
25+
int getCount() {return pos;}
26+
}
27+
28+
final byte[] charType = new byte[128];
29+
final short[] tokenPos = new short[1024];
30+
HashArray hashArray = new HashArray();
31+
32+
void init() {
33+
IntStream.range('A', 'Z').forEach(c -> charType[c] = 1);
34+
IntStream.range('a', 'z').forEach(c -> charType[c] = 1);
35+
charType['_'] = 1;
36+
}
37+
38+
int parseToken(byte[] sql, int pos, int sqlLength) {
39+
int tp = 0;
40+
byte c = (byte)(sql[pos] & 0xDF);
41+
long hash = c;
42+
pos++;
43+
while (pos < sqlLength && charType[c = sql[pos]] == 1) {
44+
hash += (long)(c & 0xDF) << (tp & 0xFF);
45+
pos++;
46+
tp++;
47+
}
48+
hashArray.set(hash);
49+
return pos;
50+
}
51+
52+
void tokenize(byte[] sql) {
53+
int sqlLength = sql.length;
54+
int pos = 0;
55+
hashArray.init();
56+
while (pos < sqlLength) {
57+
if (charType[sql[pos]]==1) {
58+
pos = parseToken(sql, pos, sqlLength);
59+
} else {
60+
pos++;
61+
}
62+
}
63+
}
64+
65+
static long RunBench(byte[] src, NewSQLParser parser) {
66+
int count = 0;
67+
long start = System.currentTimeMillis();
68+
do {
69+
parser.tokenize(src);
70+
} while (count++ < 10_000_000);
71+
return System.currentTimeMillis() - start;
72+
}
73+
74+
public static void main(String[] args) {
75+
NewSQLParser parser = new NewSQLParser();
76+
parser.init();
77+
long min = 0;
78+
byte[] src = "SELECT a FROM ab , ee.ff AS f,(SELECT a FROM `schema_bb`.`tbl_bb`,(SELECT a FROM ccc AS c, `dddd`));".getBytes(StandardCharsets.UTF_8);//20个token
79+
for (int i = 0; i < 50; i++) {
80+
System.out.print("Loop " + i + " : ");
81+
long cur = RunBench(src, parser);//by kaiz : 不加分析应该可以进2.6秒
82+
System.out.println(cur);
83+
if (cur < min || min == 0) {
84+
min = cur;
85+
}
86+
}
87+
System.out.print("min time : " + min);
88+
// parser.tokenize(src);
89+
// System.out.print("token count : "+parser.hashArray.getCount());
90+
}
91+
}

0 commit comments

Comments
 (0)