Skip to content

Commit 0cf2f65

Browse files
authored
Merge pull request #53 from codefuse-ai/xxh_dev
Add COREF for java language extractor source code.
2 parents 952b99c + dbe1215 commit 0cf2f65

File tree

456 files changed

+49145
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

456 files changed

+49145
-0
lines changed

language/java/extractor/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Introduction
2+
The codefuse-query java extractor transforms the source code of Java projects into standardized coref-java data, which is utilized for further analysis by codefuse-query.
3+
4+
# Quick Start
5+
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
6+
2. Build. Execute `mvn clean install`.
7+
3. Run. Execute `java -jar target/java-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_JAVA_REPO} ./db`.
8+
9+
After execution, a file named coref_java_src.db will be generated in the ./db directory.

language/java/extractor/README_cn.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# 介绍
2+
codefuse-query java extractor 将 java 项目的源码转化为 coref-java 标准化数据,用于codefuse-query的进一步分析。
3+
4+
# 快速开始
5+
1. 设置 JAVA_HOME。`echo $JAVA_HOME` 如果显示为空,则还没有设置好。
6+
2. 构建。 `mvn clean install`
7+
3. 运行。 `java -jar target/java-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_JAVA_REPO} ./db`
8+
9+
执行完成后,会在 ./db 目录下生成 coref_java_src.db 文件。

language/java/extractor/pom.xml

+258
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<groupId>com.alipay.tool</groupId>
6+
<artifactId>java-extractor</artifactId>
7+
<version>0.2.0</version>
8+
9+
<packaging>jar</packaging>
10+
11+
<name>coref-java-src-extractor</name>
12+
<url>http://maven.apache.org</url>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
<kotlin.version.coref>1.5.21</kotlin.version.coref>
17+
</properties>
18+
<dependencies>
19+
<dependency>
20+
<groupId>org.mybatis</groupId>
21+
<artifactId>mybatis</artifactId>
22+
<version>3.5.7</version>
23+
</dependency>
24+
<dependency>
25+
<groupId>javax.annotation</groupId>
26+
<artifactId>javax.annotation-api</artifactId>
27+
<version>1.3.2</version>
28+
</dependency>
29+
<dependency>
30+
<groupId>org.mybatis.dynamic-sql</groupId>
31+
<artifactId>mybatis-dynamic-sql</artifactId>
32+
<version>1.3.0</version>
33+
</dependency>
34+
<dependency>
35+
<groupId>commons-codec</groupId>
36+
<artifactId>commons-codec</artifactId>
37+
<version>1.15</version>
38+
</dependency>
39+
<dependency>
40+
<groupId>com.ibm.icu</groupId>
41+
<artifactId>icu4j</artifactId>
42+
<version>59.1</version>
43+
</dependency>
44+
45+
<dependency>
46+
<groupId>org.apache.commons</groupId>
47+
<artifactId>commons-lang3</artifactId>
48+
<version>3.12.0</version>
49+
</dependency>
50+
51+
<dependency>
52+
<groupId>org.projectlombok</groupId>
53+
<artifactId>lombok</artifactId>
54+
<version>1.18.20</version>
55+
<scope>provided</scope>
56+
</dependency>
57+
<dependency>
58+
<groupId>me.tongfei</groupId>
59+
<artifactId>progressbar</artifactId>
60+
<version>0.9.2</version>
61+
</dependency>
62+
63+
<dependency>
64+
<groupId>org.jetbrains</groupId>
65+
<artifactId>annotations</artifactId>
66+
<version>22.0.0</version>
67+
</dependency>
68+
<dependency>
69+
<groupId>uk.com.robust-it</groupId>
70+
<artifactId>cloning</artifactId>
71+
<version>1.9.12</version>
72+
</dependency>
73+
74+
<dependency>
75+
<groupId>com.google.code.gson</groupId>
76+
<artifactId>gson</artifactId>
77+
<version>2.8.8</version>
78+
</dependency>
79+
<dependency>
80+
<groupId>com.google.guava</groupId>
81+
<artifactId>guava</artifactId>
82+
<version>30.1.1-jre</version>
83+
</dependency>
84+
<dependency>
85+
<groupId>org.hamcrest</groupId>
86+
<artifactId>hamcrest-all</artifactId>
87+
<version>1.3</version>
88+
</dependency>
89+
<dependency>
90+
<groupId>com.google.re2j</groupId>
91+
<artifactId>re2j</artifactId>
92+
<version>1.6</version>
93+
</dependency>
94+
<dependency>
95+
<groupId>org.jetbrains.kotlin</groupId>
96+
<artifactId>kotlin-compiler-embeddable</artifactId>
97+
<version>${kotlin.version.coref}</version>
98+
</dependency>
99+
<dependency>
100+
<groupId>org.jetbrains.kotlin</groupId>
101+
<artifactId>kotlin-reflect</artifactId>
102+
<version>${kotlin.version.coref}</version>
103+
</dependency>
104+
<dependency>
105+
<groupId>org.jetbrains.kotlin</groupId>
106+
<artifactId>kotlin-script-runtime</artifactId>
107+
<version>${kotlin.version.coref}</version>
108+
</dependency>
109+
<dependency>
110+
<groupId>org.jetbrains.kotlin</groupId>
111+
<artifactId>kotlin-stdlib</artifactId>
112+
<version>${kotlin.version.coref}</version>
113+
</dependency>
114+
115+
<dependency>
116+
<groupId>net.java.dev.jna</groupId>
117+
<artifactId>jna</artifactId>
118+
<version>4.1.0</version>
119+
</dependency>
120+
121+
<dependency>
122+
<groupId>org.xerial</groupId>
123+
<artifactId>sqlite-jdbc</artifactId>
124+
<version>3.36.0.2</version>
125+
</dependency>
126+
127+
<dependency>
128+
<groupId>tk.mybatis</groupId>
129+
<artifactId>mapper</artifactId>
130+
<version>4.1.5</version>
131+
</dependency>
132+
<dependency>
133+
<groupId>org.junit.jupiter</groupId>
134+
<artifactId>junit-jupiter</artifactId>
135+
<version>5.9.1</version>
136+
<scope>test</scope>
137+
</dependency>
138+
<dependency>
139+
<groupId>info.picocli</groupId>
140+
<artifactId>picocli</artifactId>
141+
<version>4.6.1</version>
142+
</dependency>
143+
<dependency>
144+
<groupId>org.apache.logging.log4j</groupId>
145+
<artifactId>log4j-core</artifactId>
146+
<version>2.14.1</version>
147+
</dependency>
148+
<dependency>
149+
<groupId>org.apache.logging.log4j</groupId>
150+
<artifactId>log4j-api</artifactId>
151+
<version>2.14.1</version>
152+
</dependency>
153+
<dependency>
154+
<groupId>org.apache.logging.log4j</groupId>
155+
<artifactId>log4j-slf4j-impl</artifactId>
156+
<version>2.14.1</version>
157+
</dependency>
158+
159+
<dependency>
160+
<groupId>commons-io</groupId>
161+
<artifactId>commons-io</artifactId>
162+
<version>2.8.0</version>
163+
</dependency>
164+
<dependency>
165+
<groupId>commons-collections</groupId>
166+
<artifactId>commons-collections</artifactId>
167+
<version>3.2.2</version>
168+
</dependency>
169+
<dependency>
170+
<groupId>com.aliyun.oss</groupId>
171+
<artifactId>aliyun-sdk-oss</artifactId>
172+
<version>3.10.2</version>
173+
</dependency>
174+
<dependency>
175+
<groupId>org.apache.commons</groupId>
176+
<artifactId>commons-compress</artifactId>
177+
<version>1.18</version>
178+
</dependency>
179+
<dependency>
180+
<groupId>com.alibaba</groupId>
181+
<artifactId>fastjson</artifactId>
182+
<version>1.2.72_noneautotype</version>
183+
</dependency>
184+
<dependency>
185+
<groupId>org.ini4j</groupId>
186+
<artifactId>ini4j</artifactId>
187+
<version>0.5.4</version>
188+
</dependency>
189+
</dependencies>
190+
191+
<build>
192+
<plugins>
193+
<plugin>
194+
<groupId>org.apache.maven.plugins</groupId>
195+
<artifactId>maven-compiler-plugin</artifactId>
196+
<version>3.8.1</version>
197+
<configuration>
198+
<source>8</source>
199+
<target>8</target>
200+
</configuration>
201+
</plugin>
202+
<plugin>
203+
<groupId>org.mybatis.generator</groupId>
204+
<artifactId>mybatis-generator-maven-plugin</artifactId>
205+
<version>1.4.0</version>
206+
<configuration>
207+
<verbose>false</verbose>
208+
<overwrite>true</overwrite>
209+
</configuration>
210+
<dependencies>
211+
<dependency>
212+
<groupId>org.xerial</groupId>
213+
<artifactId>sqlite-jdbc</artifactId>
214+
<version>3.36.0.1</version>
215+
</dependency>
216+
<dependency>
217+
<groupId>tk.mybatis</groupId>
218+
<artifactId>mapper</artifactId>
219+
<version>4.1.5</version>
220+
</dependency>
221+
</dependencies>
222+
<executions>
223+
<execution>
224+
<id>Generate MyBatis Artifacts</id>
225+
<goals>
226+
<goal>generate</goal>
227+
</goals>
228+
</execution>
229+
</executions>
230+
</plugin>
231+
<plugin>
232+
<groupId>org.apache.maven.plugins</groupId>
233+
<artifactId>maven-assembly-plugin</artifactId>
234+
<version>3.3.0</version>
235+
<configuration>
236+
<archive>
237+
<manifest>
238+
<mainClass>com.alipay.codequery.Extractor</mainClass>
239+
</manifest>
240+
</archive>
241+
<descriptorRefs>
242+
<descriptorRef>jar-with-dependencies</descriptorRef>
243+
</descriptorRefs>
244+
</configuration>
245+
<executions>
246+
<execution>
247+
<id>make-assembly</id>
248+
<phase>package</phase>
249+
<goals>
250+
<goal>single</goal>
251+
</goals>
252+
</execution>
253+
</executions>
254+
</plugin>
255+
</plugins>
256+
</build>
257+
</project>
258+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
package com.alipay.codequery;
2+
3+
import com.alipay.codequery.project.ProjectUtil;
4+
import com.alipay.codequery.util.PathUtil;
5+
import lombok.Getter;
6+
import org.jetbrains.annotations.NotNull;
7+
8+
import java.io.File;
9+
import java.util.*;
10+
11+
12+
public class Configuration implements Cloneable {
13+
14+
public final String commitId = "init";
15+
public final String repository = "Not Specified";
16+
public List<String> sourcepath = new ArrayList<>();
17+
public String javaHome = null;
18+
public List<String> classpath = new ArrayList<>();
19+
20+
@Getter
21+
private final @NotNull Collection<File> kotlinFiles = new HashSet<>();
22+
@Getter
23+
private final @NotNull Collection<File> javaFiles = new HashSet<>();
24+
@Getter
25+
private final @NotNull Set<File> javaDirs = new HashSet<>();
26+
@Getter
27+
private final List<File> classFiles = new ArrayList<>();
28+
29+
/**
30+
* MENTION: we allow multiple source paths exist but we ONLY use the first one as root
31+
*/
32+
public String getSourcePath() {
33+
return sourcepath.get(0);
34+
}
35+
36+
public void prepareFile() {
37+
clearExistedFiles();
38+
39+
// jar class path
40+
ProjectUtil.getClassPaths(this.classpath).stream().map(File::new).forEach(classFiles::add);
41+
42+
// source file path
43+
for (String sourceRoot : sourcepath) {
44+
File file = new File(sourceRoot).getAbsoluteFile();
45+
withJavaSrc(file);
46+
withKotlinSrc(file);
47+
}
48+
}
49+
50+
public void withKotlinSrc(File root) {
51+
ArrayList<File> results = new ArrayList<>();
52+
53+
PathUtil.TraverseBuilder traversal = new PathUtil.TraverseBuilder()
54+
.withSymbol(false)
55+
.withDirInResult(false)
56+
.withSuffix(".kt");
57+
traversal.traverse(root, results);
58+
59+
kotlinFiles.addAll(results);
60+
}
61+
62+
63+
public void withJavaSrc(File root) {
64+
ArrayList<File> results = new ArrayList<>();
65+
PathUtil.TraverseBuilder traversal = new PathUtil.TraverseBuilder()
66+
.withSymbol(false)
67+
.withDirInResult(true)
68+
.withSuffix(".java");
69+
traversal.traverse(root, results);
70+
71+
for (File file : results) {
72+
if (file.isDirectory()) {
73+
javaDirs.add(file);
74+
} else {
75+
javaFiles.add(file);
76+
}
77+
}
78+
}
79+
80+
private void clearExistedFiles() {
81+
javaFiles.clear();
82+
javaDirs.clear();
83+
kotlinFiles.clear();
84+
classFiles.clear();
85+
}
86+
87+
}

0 commit comments

Comments
 (0)