Skip to content

Commit 1250196

Browse files
authored
Merge pull request #66 from codefuse-ai/xxh_dev
[Feat]Add xml extractor source code
2 parents 711d834 + cd74b47 commit 1250196

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+2288
-0
lines changed

Diff for: language/xml/extractor/README.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Introduction
2+
The codefuse-query xml extractor transforms the source code of xml file into standardized coref-xml data, which is utilized for further analysis by codefuse-query.
3+
4+
# Quick Start
5+
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
6+
2. Build. Execute `mvn clean install`.
7+
3. Run. Execute `java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`.
8+
9+
After execution, a file named coref_xml_src.db will be generated in the ./db directory.

Diff for: language/xml/extractor/README_cn.md

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# 简介
2+
Codefuse-query XML 提取器将 XML 文件的源代码转换为标准化的 coref-xml 数据,这些数据用于 codefuse-query 进行进一步分析。
3+
4+
# 快速开始
5+
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。
6+
2. 构建。执行 mvn clean install。
7+
3. 运行。执行 java -jar target/xml-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。
8+
9+
执行后,一个名为 coref_xml_src.db 的文件将生成在 ./db 目录下。
1.51 MB
Binary file not shown.

Diff for: language/xml/extractor/pom.xml

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<groupId>com.alipay.codequery</groupId>
6+
<artifactId>xml-extractor</artifactId>
7+
<version>1.0-SNAPSHOT</version>
8+
9+
<packaging>jar</packaging>
10+
11+
<name>xml-extractor</name>
12+
<url>http://maven.apache.org</url>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
</properties>
17+
18+
<dependencies>
19+
<dependency>
20+
<groupId>junit</groupId>
21+
<artifactId>junit</artifactId>
22+
<version>4.12</version>
23+
<scope>test</scope>
24+
</dependency>
25+
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
26+
<dependency>
27+
<groupId>org.apache.commons</groupId>
28+
<artifactId>commons-lang3</artifactId>
29+
<version>3.11</version>
30+
</dependency>
31+
32+
<!-- https://mvnrepository.com/artifact/stax/stax-api -->
33+
<dependency>
34+
<groupId>stax</groupId>
35+
<artifactId>stax-api</artifactId>
36+
<version>1.0.1</version>
37+
</dependency>
38+
39+
<dependency>
40+
<groupId>org.codehaus.woodstox</groupId>
41+
<artifactId>stax2-api</artifactId>
42+
<version>4.2</version>
43+
</dependency>
44+
45+
<dependency>
46+
<groupId>com.fasterxml.woodstox</groupId>
47+
<artifactId>woodstox-core</artifactId>
48+
<version>6.4.1-SNAPSHOT</version>
49+
<scope>system</scope>
50+
<systemPath>${project.basedir}/lib/woodstox-core-6.4.1-SNAPSHOT.jar</systemPath>
51+
</dependency>
52+
53+
<dependency>
54+
<groupId>org.projectlombok</groupId>
55+
<artifactId>lombok</artifactId>
56+
<version>1.18.16</version>
57+
<scope>provided</scope>
58+
</dependency>
59+
60+
<dependency>
61+
<groupId>org.xerial</groupId>
62+
<artifactId>sqlite-jdbc</artifactId>
63+
<version>3.36.0.2</version>
64+
</dependency>
65+
66+
<dependency>
67+
<groupId>org.mybatis</groupId>
68+
<artifactId>mybatis</artifactId>
69+
<version>3.5.6</version>
70+
</dependency>
71+
72+
<dependency>
73+
<groupId>tk.mybatis</groupId>
74+
<artifactId>mapper</artifactId>
75+
<!-- 建议使用最新版本,最新版本请从项目首页查找 -->
76+
<version>4.1.5</version>
77+
</dependency>
78+
79+
<dependency>
80+
<groupId>org.apache.logging.log4j</groupId>
81+
<artifactId>log4j-core</artifactId>
82+
<version>2.14.1</version>
83+
</dependency>
84+
<dependency>
85+
<groupId>org.apache.logging.log4j</groupId>
86+
<artifactId>log4j-api</artifactId>
87+
<version>2.14.1</version>
88+
</dependency>
89+
<dependency>
90+
<groupId>org.apache.logging.log4j</groupId>
91+
<artifactId>log4j-slf4j-impl</artifactId>
92+
<version>2.14.1</version>
93+
</dependency>
94+
95+
</dependencies>
96+
<build>
97+
<plugins>
98+
<plugin>
99+
<groupId>org.apache.maven.plugins</groupId>
100+
<artifactId>maven-compiler-plugin</artifactId>
101+
<configuration>
102+
<source>8</source>
103+
<target>8</target>
104+
</configuration>
105+
</plugin>
106+
<plugin>
107+
<groupId>org.apache.maven.plugins</groupId>
108+
<artifactId>maven-surefire-plugin</artifactId>
109+
<version>2.4.2</version>
110+
<configuration>
111+
<skipTests>true</skipTests>
112+
</configuration>
113+
</plugin>
114+
<plugin>
115+
<groupId>org.mybatis.generator</groupId>
116+
<artifactId>mybatis-generator-maven-plugin</artifactId>
117+
<version>1.3.7</version>
118+
<configuration>
119+
<verbose>true</verbose>
120+
<overwrite>true</overwrite>
121+
</configuration>
122+
<dependencies>
123+
<dependency>
124+
<groupId>org.xerial</groupId>
125+
<artifactId>sqlite-jdbc</artifactId>
126+
<version>3.36.0.2</version>
127+
</dependency>
128+
<dependency>
129+
<groupId>tk.mybatis</groupId>
130+
<artifactId>mapper</artifactId>
131+
<version>4.1.5</version>
132+
</dependency>
133+
</dependencies>
134+
<executions>
135+
<execution>
136+
<id>Generate MyBatis Artifacts</id>
137+
<goals>
138+
<goal>generate</goal>
139+
</goals>
140+
</execution>
141+
</executions>
142+
</plugin>
143+
<plugin>
144+
<groupId>org.apache.maven.plugins</groupId>
145+
<artifactId>maven-assembly-plugin</artifactId>
146+
<version>2.5.5</version>
147+
<configuration>
148+
<archive>
149+
<manifest>
150+
<mainClass>com.alipay.codequery.Extractor</mainClass>
151+
</manifest>
152+
</archive>
153+
<descriptorRefs>
154+
<descriptorRef>jar-with-dependencies</descriptorRef>
155+
</descriptorRefs>
156+
</configuration>
157+
<executions>
158+
<execution>
159+
<id>make-assembly</id>
160+
<phase>package</phase>
161+
<goals>
162+
<goal>single</goal>
163+
</goals>
164+
</execution>
165+
</executions>
166+
</plugin>
167+
168+
</plugins>
169+
</build>
170+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
2+
package com.alipay.codequery;
3+
import com.alipay.codequery.stax.StaxCorefExtractor;
4+
import com.alipay.codequery.util.CorefStorage;
5+
import com.alipay.codequery.util.LoggerUtil;
6+
import org.apache.logging.log4j.Level;
7+
import org.apache.logging.log4j.LogManager;
8+
import org.apache.logging.log4j.Logger;
9+
10+
import javax.xml.stream.XMLStreamException;
11+
import java.io.File;
12+
import java.io.IOException;
13+
14+
public class Extractor {
15+
private static final Logger logger = LogManager.getLogger(Extractor.class);
16+
public static final String XML_EXT = ".xml";
17+
public static final String AXML_EXT = ".axml";
18+
public static final String[] FILE_EXT_ARRAY = {
19+
XML_EXT,
20+
AXML_EXT,
21+
};
22+
23+
public static void main(String[] args) throws IOException, XMLStreamException {
24+
LoggerUtil.initLogger(Level.INFO);
25+
26+
long start = System.currentTimeMillis();
27+
// repoDir和destDir是设置的本地测试目录,在生产中会被替换掉
28+
String repoDir = "";
29+
String destDir = "";
30+
if (args.length > 0) {
31+
repoDir = args[0];
32+
}
33+
if (args.length > 1) {
34+
destDir = args[1];
35+
}
36+
if (!destDir.endsWith(File.separator)) {
37+
destDir += File.separator;
38+
}
39+
CorefStorage corefStorage = new CorefStorage(destDir);
40+
File sourceDir = new File(repoDir);
41+
parse(sourceDir, sourceDir, corefStorage);
42+
logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start));
43+
}
44+
45+
private static void parse(File sourceDir, File rootDir, CorefStorage corefStorage) {
46+
File[] files = rootDir.listFiles();
47+
if (files == null) {
48+
return;
49+
}
50+
for (File file: files) {
51+
if (file.isDirectory()) {
52+
parse(sourceDir, file, corefStorage);
53+
} else {
54+
for (String fileExt: FILE_EXT_ARRAY) {
55+
if (file.getName().endsWith(fileExt)) {
56+
logger.info("Start Extracting xml file: {}", file.getAbsolutePath());
57+
try {
58+
StaxCorefExtractor extractor = new StaxCorefExtractor(file, corefStorage, sourceDir.getAbsolutePath());
59+
extractor.parse();
60+
} catch (Exception e) {
61+
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath());
62+
}
63+
}
64+
}
65+
}
66+
}
67+
}
68+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
package com.alipay.codequery.dal.mybatis.domain;
2+
3+
import javax.persistence.*;
4+
5+
@Table(name = "xml_attribute")
6+
public class XmlAttribute {
7+
@Id
8+
private Integer id;
9+
10+
@Column(name = "element_id")
11+
private Integer elementId;
12+
13+
private String name;
14+
15+
private String value;
16+
17+
@Column(name = "index_order")
18+
private Integer indexOrder;
19+
20+
@Column(name = "location_id")
21+
private Integer locationId;
22+
23+
public XmlAttribute(Integer id, Integer elementId, String name, String value, Integer indexOrder, Integer locationId) {
24+
this.id = id;
25+
this.elementId = elementId;
26+
this.name = name;
27+
this.value = value;
28+
this.indexOrder = indexOrder;
29+
this.locationId = locationId;
30+
}
31+
32+
public XmlAttribute() {
33+
super();
34+
}
35+
36+
/**
37+
* @return id
38+
*/
39+
public Integer getId() {
40+
return id;
41+
}
42+
43+
/**
44+
* @param id
45+
*/
46+
public void setId(Integer id) {
47+
this.id = id;
48+
}
49+
50+
/**
51+
* @return element_id
52+
*/
53+
public Integer getElementId() {
54+
return elementId;
55+
}
56+
57+
/**
58+
* @param elementId
59+
*/
60+
public void setElementId(Integer elementId) {
61+
this.elementId = elementId;
62+
}
63+
64+
/**
65+
* @return name
66+
*/
67+
public String getName() {
68+
return name;
69+
}
70+
71+
/**
72+
* @param name
73+
*/
74+
public void setName(String name) {
75+
this.name = name == null ? null : name.trim();
76+
}
77+
78+
/**
79+
* @return value
80+
*/
81+
public String getValue() {
82+
return value;
83+
}
84+
85+
/**
86+
* @param value
87+
*/
88+
public void setValue(String value) {
89+
this.value = value == null ? null : value.trim();
90+
}
91+
92+
/**
93+
* @return index_order
94+
*/
95+
public Integer getIndexOrder() {
96+
return indexOrder;
97+
}
98+
99+
/**
100+
* @param indexOrder
101+
*/
102+
public void setIndexOrder(Integer indexOrder) {
103+
this.indexOrder = indexOrder;
104+
}
105+
106+
/**
107+
* @return location_id
108+
*/
109+
public Integer getLocationId() {
110+
return locationId;
111+
}
112+
113+
/**
114+
* @param locationId
115+
*/
116+
public void setLocationId(Integer locationId) {
117+
this.locationId = locationId;
118+
}
119+
}

0 commit comments

Comments
 (0)