Skip to content

Commit bd4d156

Browse files
Merge pull request #310 from jctoledo/pdb-r3
PDB R3 Bio2RDF Rdfizer
2 parents 588e4fe + 0fbbff1 commit bd4d156

File tree

106 files changed

+13638
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+13638
-0
lines changed

pdb/.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
*target*
2+
*.jar
3+
*.war
4+
*.ear
5+
*.class

pdb/.project

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>pdb2rdf-R3</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.eclipse.m2e.core.maven2Builder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
</buildSpec>
14+
<natures>
15+
<nature>org.eclipse.m2e.core.maven2Nature</nature>
16+
</natures>
17+
</projectDescription>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
activeProfiles=
2+
eclipse.preferences.version=1
3+
resolveWorkspaceProjects=true
4+
version=1

pdb/README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#PDB Bio2RDF parser
2+
This is the Bio2RDF parser for the PDB dataset. The files used for the conversion are the PDBML files.
3+
4+
##Requirements
5+
Make sure that the following software is installed and accessible to your users' PATH:
6+
1. Sun Java JRE 1.6 or above+
7+
2. Apache Maven 2.2.0 or above+
8+
9+
##Building
10+
Run the following command to build the following:
11+
12+
mvn clean install
13+
14+
##Downloading source files
15+
This PDB RDFizer converts PDBML files to Bio2RDF R3 compliant linked data. This RDFizer can be executed on the entire PDB dataset which has to be first downloaded. To download and mirror the entire set of PDB files download and run this [rsync script](https://gist.github.com/jctoledo/6426686).
16+
17+
##Contents
18+
19+
* **pdb2rdf-cli**:
20+
The command line interface for this parser. Once you have installed the software go to pdb2rdf-cli/target and extract `pdb2rdf-cli-2.0.0-bin.zip`. This file contains `pdb2rdf.sh` an executable shell script that can be used to execute this rdfizer. Here are some example execution types:
21+
22+
1. Print the help:
23+
./pdb2rdf.sh -help
24+
2. Convert one PDB record given its id and store the output in /tmp/output/:
25+
./pdb2rdf.sh -out /tmp/output
26+
3. Convert all PDB entries found in a given directory:
27+
./pdb2rdf.sh -dir /path/to/pdbml/files -out /path/to/outputdir
28+
4. Generate the output of this RDFizer as N-Quads
29+
./pdb2rdf.sh -dir /path/to/pdbml/files -out /path/to/outputdir -format NQUADS;
30+
31+
* **pdb2rdf-parser**:
32+
This module holds the set of classes that convert the PDBML file format to Bio2RDF compliant RDF.
33+
34+
* **pdb2rdf-cluster**:
35+
The cluster edition of this parser. Use this module if you have multiple computers that can be used to simultaneously convert PDBML files into RDF Once you have installed the software go to pdb2rdf-cluster/target and extract `pdb2rdf-cluster-2.0.0-bin.zip`. This file contains `run.sh` an executable shell script that can be used to initialize a PDBML file server onto which other `pdb2rdf.sh` instances can connect to. Here are some example execution types:
36+
37+
1. Print the help:
38+
./run.sh
39+
2. Set the directory where the gzipped PDBML files are located and set the listening port to 8123
40+
./run.sh -dir /path/to/local/pdbml/direcory -gzip -port 8123
41+
42+
Once initialized pdb2rdf.sh clients can connect to the server in the following manner:
43+
44+
./pdb2rdf.sh -cluster 192.168.1.123:8123 -out /tmp/output/directory
45+
46+
47+

pdb/pdb2rdf-cli/.classpath

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<classpath>
3+
<classpathentry kind="src" output="target/classes" path="src/main/java">
4+
<attributes>
5+
<attribute name="optional" value="true"/>
6+
<attribute name="maven.pomderived" value="true"/>
7+
</attributes>
8+
</classpathentry>
9+
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
10+
<attributes>
11+
<attribute name="maven.pomderived" value="true"/>
12+
</attributes>
13+
</classpathentry>
14+
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
15+
<attributes>
16+
<attribute name="optional" value="true"/>
17+
<attribute name="maven.pomderived" value="true"/>
18+
</attributes>
19+
</classpathentry>
20+
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
21+
<attributes>
22+
<attribute name="maven.pomderived" value="true"/>
23+
</attributes>
24+
</classpathentry>
25+
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6">
26+
<attributes>
27+
<attribute name="maven.pomderived" value="true"/>
28+
</attributes>
29+
</classpathentry>
30+
<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER"/>
31+
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
32+
<attributes>
33+
<attribute name="maven.pomderived" value="true"/>
34+
</attributes>
35+
</classpathentry>
36+
<classpathentry kind="output" path="target/classes"/>
37+
</classpath>

pdb/pdb2rdf-cli/.project

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<projectDescription>
3+
<name>pdb2rdf-cli</name>
4+
<comment></comment>
5+
<projects>
6+
</projects>
7+
<buildSpec>
8+
<buildCommand>
9+
<name>org.eclipse.jdt.core.javabuilder</name>
10+
<arguments>
11+
</arguments>
12+
</buildCommand>
13+
<buildCommand>
14+
<name>org.maven.ide.eclipse.maven2Builder</name>
15+
<arguments>
16+
</arguments>
17+
</buildCommand>
18+
<buildCommand>
19+
<name>org.eclipse.m2e.core.maven2Builder</name>
20+
<arguments>
21+
</arguments>
22+
</buildCommand>
23+
</buildSpec>
24+
<natures>
25+
<nature>org.eclipse.m2e.core.maven2Nature</nature>
26+
<nature>org.eclipse.jdt.core.javanature</nature>
27+
<nature>org.maven.ide.eclipse.maven2Nature</nature>
28+
</natures>
29+
</projectDescription>
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
#Wed May 02 12:54:53 EDT 2012
2+
eclipse.preferences.version=1
3+
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
4+
org.eclipse.jdt.core.compiler.compliance=1.6
5+
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
6+
org.eclipse.jdt.core.compiler.source=1.6
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
activeProfiles=
2+
eclipse.preferences.version=1
3+
resolveWorkspaceProjects=true
4+
version=1
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
#Wed May 02 12:54:52 EDT 2012
2+
activeProfiles=
3+
eclipse.preferences.version=1
4+
fullBuildGoals=process-test-resources
5+
resolveWorkspaceProjects=true
6+
resourceFilterGoals=process-resources resources\:testResources
7+
skipCompilerPlugin=true
8+
version=1

pdb/pdb2rdf-cli/log4j.properties

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
Log4j.debug=true
2+
3+
### direct log messages to stdout ###
4+
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
5+
log4j.appender.stdout.Target=System.out
6+
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
7+
log4j.appender.stdout.layout.ConversionPattern=%d{ABSOLUTE} %5p %c{1}:%L - %m%n
8+
9+
10+
### Appender for unknown residues log
11+
log4j.appender.ur=org.apache.log4j.FileAppender
12+
log4j.appender.ur.File=unknown_residues.log
13+
log4j.appender.ur.layout=org.apache.log4j.SimpleLayout
14+
15+
log4j.rootLogger=warn, stdout
16+
log4j.logger.com.dumontierlab=info
17+
18+
log4j.logger.unknownresidues=warn, ur

pdb/pdb2rdf-cli/pdb2rdf.bat

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
@ECHO OFF
2+
3+
setlocal enabledelayedexpansion
4+
5+
set CLASSPATH=
6+
for /f %%i in ( 'dir /b lib\*.jar' ) do set CLASSPATH=!CLASSPATH!lib\%%i;
7+
8+
9+
10+
11+
REM Set the maximum RAM used by this program
12+
13+
set MEMORY=2g
14+
15+
REM set OPTS=-ea -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=8010
16+
set OPTS=-ea -Dlog4j.configuration=file:log4j.properties
17+
18+
19+
java -Xmx%MEMORY% -cp %CLASSPATH% %OPTS% com.dumontierlab.pdb2rdf.Pdb2Rdf %*

pdb/pdb2rdf-cli/pdb2rdf.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#!/bin/sh
2+
CLASSPATH=""
3+
for i in $( ls lib/*.jar );
4+
do
5+
CLASSPATH="$CLASSPATH$i:"
6+
done
7+
8+
#Set the maximum RAM used by this program
9+
MEMORY=4g
10+
11+
#OPTS="-ea -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=8010"
12+
OPTS="-ea -Dlog4j.configuration=file:log4j.properties"
13+
14+
java -Xmx$MEMORY -cp $CLASSPATH $OPTS com.dumontierlab.pdb2rdf.Pdb2Rdf $@

pdb/pdb2rdf-cli/pom.xml

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
2+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3+
<parent>
4+
<artifactId>pdb2rdf</artifactId>
5+
<groupId>com.dumontierlab</groupId>
6+
<version>2.0.0</version>
7+
</parent>
8+
<modelVersion>4.0.0</modelVersion>
9+
<artifactId>pdb2rdf-cli</artifactId>
10+
<name>PDB Bio2RDF CLI</name>
11+
<dependencies>
12+
<dependency>
13+
<groupId>com.dumontierlab</groupId>
14+
<artifactId>pdb2rdf-parser</artifactId>
15+
<version>${project.version}</version>
16+
</dependency>
17+
<dependency>
18+
<groupId>com.openlinksw</groupId>
19+
<artifactId>virtuoso-jdbc3</artifactId>
20+
</dependency>
21+
<dependency>
22+
<groupId>com.openlinksw</groupId>
23+
<artifactId>virtuoso-jena</artifactId>
24+
</dependency>
25+
<dependency>
26+
<groupId>commons-cli</groupId>
27+
<artifactId>commons-cli</artifactId>
28+
</dependency>
29+
<dependency>
30+
<groupId>jline</groupId>
31+
<artifactId>jline</artifactId>
32+
</dependency>
33+
<dependency>
34+
<groupId>org.apache.jena</groupId>
35+
<artifactId>jena-arq</artifactId>
36+
</dependency>
37+
<dependency>
38+
<groupId>commons-io</groupId>
39+
<artifactId>commons-io</artifactId>
40+
</dependency>
41+
<dependency>
42+
<groupId>junit</groupId>
43+
<artifactId>junit</artifactId>
44+
<scope>test</scope>
45+
</dependency>
46+
</dependencies>
47+
48+
<build>
49+
<plugins>
50+
<plugin>
51+
<groupId>org.apache.maven.plugins</groupId>
52+
<artifactId>maven-compiler-plugin</artifactId>
53+
<configuration>
54+
<source>1.6</source>
55+
<target>1.6</target>
56+
</configuration>
57+
</plugin>
58+
<plugin>
59+
<artifactId>maven-assembly-plugin</artifactId>
60+
<configuration>
61+
<descriptors>
62+
<descriptor>src/main/assembly/assembly.xml</descriptor>
63+
</descriptors>
64+
</configuration>
65+
<executions>
66+
<execution>
67+
<id>make-assembly</id> <!-- this is used for inheritance merges -->
68+
<phase>package</phase> <!-- append to the packaging phase. -->
69+
<goals>
70+
<goal>single</goal> <!-- goals == mojos -->
71+
</goals>
72+
</execution>
73+
</executions>
74+
</plugin>
75+
<plugin>
76+
<artifactId>maven-assembly-plugin</artifactId>
77+
<configuration>
78+
<descriptorRefs>
79+
<descriptorRef>jar-with-dependencies</descriptorRef>
80+
</descriptorRefs>
81+
</configuration>
82+
</plugin>
83+
<plugin>
84+
<groupId>org.apache.maven.plugins</groupId>
85+
<artifactId>maven-source-plugin</artifactId>
86+
<version>2.0.4</version>
87+
<executions>
88+
<execution>
89+
<phase>package</phase>
90+
<goals>
91+
<goal>jar</goal>
92+
</goals>
93+
</execution>
94+
</executions>
95+
</plugin>
96+
</plugins>
97+
</build>
98+
</project>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<assembly>
2+
<id>bin</id>
3+
<formats>
4+
<format>tar.gz</format>
5+
<format>zip</format>
6+
</formats>
7+
<fileSets>
8+
<fileSet>
9+
<includes>
10+
<include>README*</include>
11+
<include>LICENSE*</include>
12+
<include>NOTICE*</include>
13+
<include>*.properties</include>
14+
</includes>
15+
</fileSet>
16+
<fileSet>
17+
<fileMode>0755</fileMode>
18+
<includes>
19+
<include>pdb2rdf.*</include>
20+
</includes>
21+
</fileSet>
22+
</fileSets>
23+
<dependencySets>
24+
<dependencySet>
25+
<outputDirectory>/lib</outputDirectory>
26+
<useProjectArtifact>true</useProjectArtifact>
27+
<scope>runtime</scope>
28+
</dependencySet>
29+
</dependencySets>
30+
</assembly>

0 commit comments

Comments
 (0)