Skip to content

Commit a59f66a

Browse files
authored
Merge pull request #68 from codefuse-ai/xxh_dev
[Feat]Add properties extractor source code and library files
2 parents 1250196 + d16876d commit a59f66a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+4872
-0
lines changed
+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Introduction
2+
The codefuse-query properties extractor transforms the source code of xml file into standardized coref-properties data, which is utilized for further analysis by codefuse-query.
3+
4+
# Quick Start
5+
1. Set `JAVA_HOME`. Execute `echo $JAVA_HOME` to display its current setting. If it displays as empty, then it has not been configured yet.
6+
2. Build. Execute `mvn clean install`.
7+
3. Run. Execute `java -jar target/properties-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db`.
8+
9+
After execution, a file named coref_properties_src.db will be generated in the ./db directory.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# 简介
2+
Codefuse-query Properties 提取器将 Properties 文件的源代码转换为标准化的 coref-properties 数据,这些数据用于 codefuse-query 进行进一步分析。
3+
4+
# 快速开始
5+
1. 设置 JAVA_HOME。执行 echo $JAVA_HOME 来显示当前的设置。如果显示为空,则表示尚未配置。
6+
2. 构建。执行 mvn clean install。
7+
3. 运行。执行 java -jar target/properties-extractor-1.0-SNAPSHOT-jar-with-dependencies.jar ${YOUR_REPO} ./db。
8+
9+
执行后,一个名为 coref_properties_src.db 的文件将生成在 ./db 目录下。

language/properties/extractor/pom.xml

+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
3+
<modelVersion>4.0.0</modelVersion>
4+
5+
<groupId>com.alipay.codequery.properties</groupId>
6+
<artifactId>properties-extractor</artifactId>
7+
<version>1.0-SNAPSHOT</version>
8+
9+
<packaging>jar</packaging>
10+
11+
<name>properties-extractor</name>
12+
<url>http://maven.apache.org</url>
13+
14+
<properties>
15+
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
16+
</properties>
17+
18+
<dependencies>
19+
<dependency>
20+
<groupId>junit</groupId>
21+
<artifactId>junit</artifactId>
22+
<version>4.12</version>
23+
<scope>test</scope>
24+
</dependency>
25+
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
26+
<dependency>
27+
<groupId>org.apache.commons</groupId>
28+
<artifactId>commons-lang3</artifactId>
29+
<version>3.12.0</version>
30+
</dependency>
31+
<!-- https://mvnrepository.com/artifact/commons-codec/commons-codec -->
32+
<dependency>
33+
<groupId>commons-codec</groupId>
34+
<artifactId>commons-codec</artifactId>
35+
<version>1.15</version>
36+
</dependency>
37+
<dependency>
38+
<groupId>org.projectlombok</groupId>
39+
<artifactId>lombok</artifactId>
40+
<version>1.18.16</version>
41+
<scope>provided</scope>
42+
</dependency>
43+
44+
<dependency>
45+
<groupId>org.xerial</groupId>
46+
<artifactId>sqlite-jdbc</artifactId>
47+
<version>3.36.0.2</version>
48+
</dependency>
49+
50+
<dependency>
51+
<groupId>org.mybatis</groupId>
52+
<artifactId>mybatis</artifactId>
53+
<version>3.5.7</version>
54+
</dependency>
55+
56+
<dependency>
57+
<groupId>tk.mybatis</groupId>
58+
<artifactId>mapper</artifactId>
59+
<!-- 建议使用最新版本,最新版本请从项目首页查找 -->
60+
<version>4.1.5</version>
61+
</dependency>
62+
63+
<dependency>
64+
<groupId>org.apache.logging.log4j</groupId>
65+
<artifactId>log4j-core</artifactId>
66+
<version>2.14.1</version>
67+
</dependency>
68+
<dependency>
69+
<groupId>org.apache.logging.log4j</groupId>
70+
<artifactId>log4j-api</artifactId>
71+
<version>2.14.1</version>
72+
</dependency>
73+
<dependency>
74+
<groupId>org.apache.logging.log4j</groupId>
75+
<artifactId>log4j-slf4j-impl</artifactId>
76+
<version>2.14.1</version>
77+
</dependency>
78+
<dependency>
79+
<groupId>info.picocli</groupId>
80+
<artifactId>picocli</artifactId>
81+
<version>4.6.1</version>
82+
</dependency>
83+
84+
</dependencies>
85+
<build>
86+
<plugins>
87+
<plugin>
88+
<groupId>org.apache.maven.plugins</groupId>
89+
<artifactId>maven-compiler-plugin</artifactId>
90+
<configuration>
91+
<source>8</source>
92+
<target>8</target>
93+
</configuration>
94+
</plugin>
95+
<plugin>
96+
<groupId>org.apache.maven.plugins</groupId>
97+
<artifactId>maven-surefire-plugin</artifactId>
98+
<version>2.4.2</version>
99+
<configuration>
100+
<skipTests>true</skipTests>
101+
</configuration>
102+
</plugin>
103+
<plugin>
104+
<groupId>org.mybatis.generator</groupId>
105+
<artifactId>mybatis-generator-maven-plugin</artifactId>
106+
<version>1.3.7</version>
107+
<configuration>
108+
<verbose>true</verbose>
109+
<overwrite>true</overwrite>
110+
</configuration>
111+
<dependencies>
112+
<dependency>
113+
<groupId>org.xerial</groupId>
114+
<artifactId>sqlite-jdbc</artifactId>
115+
<version>3.36.0.2</version>
116+
</dependency>
117+
<dependency>
118+
<groupId>tk.mybatis</groupId>
119+
<artifactId>mapper</artifactId>
120+
<version>4.1.5</version>
121+
</dependency>
122+
</dependencies>
123+
<executions>
124+
<execution>
125+
<id>Generate MyBatis Artifacts</id>
126+
<goals>
127+
<goal>generate</goal>
128+
</goals>
129+
</execution>
130+
</executions>
131+
</plugin>
132+
<plugin>
133+
<groupId>org.apache.maven.plugins</groupId>
134+
<artifactId>maven-assembly-plugin</artifactId>
135+
<version>2.5.5</version>
136+
<configuration>
137+
<archive>
138+
<manifest>
139+
<mainClass>com.alipay.codequery.properties.Extractor</mainClass>
140+
</manifest>
141+
</archive>
142+
<descriptorRefs>
143+
<descriptorRef>jar-with-dependencies</descriptorRef>
144+
</descriptorRefs>
145+
</configuration>
146+
<executions>
147+
<execution>
148+
<id>make-assembly</id>
149+
<phase>package</phase>
150+
<goals>
151+
<goal>single</goal>
152+
</goals>
153+
</execution>
154+
</executions>
155+
</plugin>
156+
157+
</plugins>
158+
</build>
159+
</project>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
2+
package com.alipay.codequery.properties;
3+
4+
import com.alipay.codequery.properties.core.CorefExtractor;
5+
import com.alipay.codequery.properties.model.Folder;
6+
import com.alipay.codequery.properties.model.Node;
7+
import com.alipay.codequery.properties.model.Program;
8+
import com.alipay.codequery.properties.storage.CorefStorage;
9+
import com.alipay.codequery.properties.core.CorefURI;
10+
import com.alipay.codequery.properties.util.LoggerUtil;
11+
import lombok.extern.slf4j.Slf4j;
12+
import org.apache.commons.lang3.StringUtils;
13+
import org.apache.logging.log4j.Level;
14+
import org.apache.logging.log4j.LogManager;
15+
import org.apache.logging.log4j.Logger;
16+
import picocli.CommandLine;
17+
import picocli.CommandLine.Command;
18+
import picocli.CommandLine.Parameters;
19+
20+
import java.io.*;
21+
import java.util.concurrent.Callable;
22+
23+
24+
@Command(name = "extract", mixinStandardHelpOptions = true, version = "extract 1.0",
25+
description = "extract COREF-Properties db from a src directory.")
26+
@Slf4j
27+
public class Extractor implements Callable<Integer> {
28+
29+
private static final Logger logger = LogManager.getLogger(Extractor.class);
30+
@Parameters(index = "0", description = "The source directory to extract.")
31+
private File srcRootDir;
32+
33+
@Parameters(index = "1", description = "The output directory for the DB file.")
34+
private File dbDir;
35+
36+
@CommandLine.Option(names = {"--corpus"}, description = "Specify the corpus of the codebase.")
37+
private String corpus = "";
38+
39+
/**
40+
*
41+
* main method.
42+
*/
43+
public static void main(String[] args) {
44+
int exitCode = new CommandLine(new Extractor()).execute(args);
45+
System.exit(exitCode);
46+
}
47+
48+
private void parse(File rootDir, CorefStorage corefStorage, CorefURI corefURI) throws IOException{
49+
File[] files = rootDir.listFiles();
50+
for (File file : files) {
51+
if (file.isDirectory()) {
52+
parse(file, corefStorage, corefURI);
53+
} else {
54+
55+
// Support extracting file's extension is 'properties' or 'properties.vm'.
56+
if (file.getName().endsWith(".properties") || file.getName().endsWith(".properties.vm")) {
57+
logger.info("Start Extracting properties file: {}", file.getAbsolutePath());
58+
try {
59+
CorefExtractor extractor = new CorefExtractor(file, corefStorage, srcRootDir.getAbsolutePath(), corefURI);
60+
extractor.parse();
61+
} catch (Exception e) {
62+
logger.error("Extraction failed, error message:{} on file {}", e.getMessage(), file.getAbsolutePath());
63+
}
64+
}
65+
}
66+
}
67+
}
68+
69+
private Program createProgramNode(String repoDir, CorefStorage corefStorage, CorefURI corefURI) {
70+
Program program = new Program();
71+
program.oid = corefURI.generateCorpusOid();
72+
program.prefix = repoDir;
73+
corefStorage.storeProgram(program.extractProgram());
74+
return program;
75+
}
76+
77+
private void visitDirectory(String repoDir, Node parent, CorefStorage corefStorage, CorefURI corefURI) {
78+
File file = new File(repoDir);
79+
80+
// Ignore the folder starts with "."
81+
if (file.isDirectory() && !(file.getName().startsWith("."))) {
82+
String absolutePath = file.getAbsolutePath();
83+
84+
Folder parentFolder = new Folder();
85+
parentFolder.name = file.getName();
86+
parentFolder.parent = parent;
87+
88+
// Calculate the relative path of the folder.
89+
if (absolutePath.endsWith(srcRootDir.getAbsolutePath())) {
90+
parentFolder.relativePath = "ROOT";
91+
} else {
92+
char head = repoDir.charAt(0);
93+
switch (head) {
94+
case '/':
95+
parentFolder.relativePath = absolutePath.substring(srcRootDir.getAbsolutePath().length() + 1);
96+
break;
97+
case '.':
98+
parentFolder.relativePath = absolutePath.substring(absolutePath.indexOf(repoDir) + 2);
99+
break;
100+
default:
101+
parentFolder.relativePath = absolutePath.substring(absolutePath.indexOf(repoDir));
102+
}
103+
}
104+
corefURI.setPath(parentFolder.relativePath);
105+
parentFolder.oid = corefURI.generateFileOid();
106+
corefStorage.storeFolder(parentFolder.extractFolder());
107+
108+
// Recursively visit the sub folders.
109+
for (File f : file.listFiles()) {
110+
if (f.isDirectory()) {
111+
visitDirectory(f.getAbsolutePath(), parentFolder, corefStorage, corefURI);
112+
} else if (f.getName().endsWith(".properties")) {
113+
CorefURI.fileMap.put(f.getAbsolutePath(), parentFolder.oid);
114+
}
115+
}
116+
}
117+
}
118+
119+
/**
120+
* Override the call method.
121+
* @return
122+
* @throws Exception
123+
*/
124+
@Override
125+
public Integer call() throws Exception {
126+
LoggerUtil.initLogger(Level.INFO);
127+
128+
long start = System.currentTimeMillis();
129+
CorefStorage corefStorage = new CorefStorage(dbDir.getAbsolutePath());
130+
CorefURI corefURI = StringUtils.isBlank(corpus) ? new CorefURI(srcRootDir.getAbsolutePath()) : new CorefURI(corpus);
131+
Program program = createProgramNode(srcRootDir.getAbsolutePath(), corefStorage, corefURI);
132+
visitDirectory(srcRootDir.getAbsolutePath(), program, corefStorage, corefURI);
133+
134+
parse(srcRootDir, corefStorage, corefURI);
135+
corefStorage.store();
136+
137+
logger.info("Time to completion (TTC): " + (System.currentTimeMillis() - start));
138+
139+
return 0;
140+
}
141+
}

0 commit comments

Comments
 (0)