1
1
<?php
2
2
/**
3
- Copyright (C) 2012 Michel Dumontier
3
+ Copyright (C) 2013 Michel Dumontier, Alison Callahan
4
4
5
5
Permission is hereby granted, free of charge, to any person obtaining a copy of
6
6
this software and associated documentation files (the "Software"), to deal in
20
20
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
21
SOFTWARE.
22
22
*/
23
-
24
- require ('../../php-lib/biopax2bio2rdf.php ' );
25
-
26
23
/**
27
24
* BioModels RDFizer
28
- * @version 1 .0
25
+ * @version 2 .0
29
26
* @author Michel Dumontier
27
+ * @author Alison Callahan
30
28
* @description http://www.ebi.ac.uk/biomodels-main/
31
29
*/
32
- class BiomodelsParser extends RDFFactory
30
+
31
+ require_once (__DIR__ .'/../../php-lib/biopax2bio2rdf.php ' );
32
+
33
+ class BiomodelsParser extends Bio2RDFizer
33
34
{
34
- private $ version = null ;
35
35
36
36
function __construct ($ argv ) {
37
- parent ::__construct ();
38
- $ this ->SetDefaultNamespace ("biomodels " );
37
+ parent ::__construct ($ argv , "biomodels " );
39
38
40
39
// set and print application parameters
41
- $ this ->AddParameter ('files ' ,true ,null ,'all|curated|biomodel#|start#-end# ' ,'entries to process: comma-separated list or hyphen-separated range ' );
42
- $ this ->AddParameter ('indir ' ,false ,null ,'/data/download/ ' .$ this ->GetNamespace ().'/ ' ,'directory to download into and parse from ' );
43
- $ this ->AddParameter ('outdir ' ,false ,null ,'/data/rdf/ ' .$ this ->GetNamespace ().'/ ' ,'directory to place rdfized files ' );
44
- $ this ->AddParameter ('graph_uri ' ,false ,null ,null ,'provide the graph uri to generate n-quads instead of n-triples ' );
45
- $ this ->AddParameter ('gzip ' ,false ,'true|false ' ,'true ' ,'gzip the output ' );
46
- $ this ->AddParameter ('download ' ,false ,'true|false ' ,'false ' ,'set true to download files ' );
47
- $ this ->AddParameter ('download_url ' ,false ,null ,'http://www.ebi.ac.uk/biomodels/models-main/publ/ ' );
48
- if ($ this ->SetParameters ($ argv ) == FALSE ) {
49
- $ this ->PrintParameters ($ argv );
50
- exit ;
51
- }
52
- if ($ this ->CreateDirectory ($ this ->GetParameterValue ('indir ' )) === FALSE ) exit ;
53
- if ($ this ->CreateDirectory ($ this ->GetParameterValue ('outdir ' )) === FALSE ) exit ;
54
- if ($ this ->GetParameterValue ('graph_uri ' )) $ this ->SetGraphURI ($ this ->GetParameterValue ('graph_uri ' ));
55
-
56
- return TRUE ;
40
+ parent ::addParameter ('files ' ,true ,null ,'all|curated|biomodel#|start#-end# ' ,'entries to process: comma-separated list or hyphen-separated range ' );
41
+ parent ::addParameter ('download_url ' ,false ,null ,'http://www.ebi.ac.uk/biomodels/models-main/publ/ ' );
42
+ parent ::initialize ();
57
43
}
58
44
59
45
function Run ()
60
46
{
61
47
// directory shortcuts
62
- $ ldir = $ this -> GetParameterValue ('indir ' );
63
- $ odir = $ this -> GetParameterValue ('outdir ' );
48
+ $ ldir = parent :: getParameterValue ('indir ' );
49
+ $ odir = parent :: getParameterValue ('outdir ' );
64
50
65
51
// get the work specified
66
- $ list = trim ($ this -> GetParameterValue ('files ' ));
52
+ $ list = trim (parent :: getParameterValue ('files ' ));
67
53
if ($ list == 'all ' ) {
68
54
// call the getAllModelsId webservice
69
55
$ file = $ ldir ."all_models.json " ;
@@ -108,29 +94,33 @@ function Run()
108
94
}
109
95
}
110
96
}
97
+
98
+ $ graph_uri = parent ::getGraphURI ();
99
+ if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
111
100
112
101
// set the write file
113
- $ outfile = 'biomodels.nt ' ; $ gz =false ;
114
- if ($ this ->GetParameterValue ('graph_uri ' )) {$ outfile = 'biomodels.nq ' ;}
115
- if ($ this ->GetParameterValue ('gzip ' )) {
116
- $ outfile .= '.gz ' ;
102
+ $ suffix = parent ::getParameterValue ('output_format ' );
103
+ $ outfile = 'biomodels ' .'. ' .$ suffix ;
104
+ $ gz =false ;
105
+
106
+ if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
117
107
$ gz = true ;
118
108
}
119
- $ bio2rdf_download_files [] = $ this ->GetBio2RDFDownloadURL ($ this ->GetNamespace ()).$ outfile ;
120
109
121
- $ this ->SetWriteFile ($ odir .$ outfile , $ gz );
110
+ $ dataset_description = '' ;
111
+ parent ::setWriteFile ($ odir .$ outfile , $ gz );
122
112
123
113
// iterate over the entries
124
114
$ i = 0 ;
125
115
$ total = count ($ entries );
126
116
foreach ($ entries AS $ id ) {
127
117
echo "processing " .(++$ i )." of $ total - biomodel# " .$ id ;
128
118
$ download_file = $ ldir .$ id .".owl.gz " ;
119
+ $ url = parent ::getParameterValue ('download_url ' )."$ id/ $ id-biopax3.owl " ;
129
120
// download if the file doesn't exist or we are told to
130
121
if (!file_exists ($ download_file ) || $ this ->GetParameterValue ('download ' ) == 'true ' ) {
131
122
// download
132
123
echo " - downloading " ;
133
- $ url = $ this ->GetParameterValue ('download_url ' )."$ id/ $ id-biopax3.owl " ;
134
124
$ buf = file_get_contents ($ url );
135
125
if (strlen ($ buf ) != 0 ) {
136
126
file_put_contents ("compress.zlib:// " .$ download_file , $ buf );
@@ -139,52 +129,76 @@ function Run()
139
129
}
140
130
141
131
// load entry, parse and write to file
142
- echo " - parsing " ;
132
+ echo " - parsing... " ;
143
133
// $this->SetReadFile($download_file,true);
144
134
$ buf = file_get_contents ("compress.zlib:// " .$ download_file );
145
-
146
- $ converter = new BioPAX2Bio2RDF ();
135
+
136
+ $ converter = new BioPAX2Bio2RDF ($ this -> getRegistry () );
147
137
$ converter ->SetBuffer ($ buf )
148
138
->SetBioPAXVersion (3 )
149
139
->SetBaseNamespace ("http://identifiers.org/biomodels.db/ $ id/ " )
150
140
->SetBio2RDFNamespace ("http://bio2rdf.org/biomodels: " .$ id ."_ " )
151
141
->SetDatasetURI ($ this ->GetDatasetURI ());
152
- $ this ->AddRDF ($ converter ->Parse ());
153
- $ this ->WriteRDFBufferToWriteFile ();
142
+
143
+ $ rdf = $ converter ->Parse ();
144
+ parent ::addRDF ($ rdf );
145
+ parent ::writeRDFBufferToWriteFile ();
146
+ parent ::getWriteFile ()->Close ();
154
147
155
- echo PHP_EOL ;
156
- }
157
- $ this ->GetWriteFile ()->Close ();
158
-
159
- // generate the release file
160
- $ this ->DeleteBio2RDFReleaseFiles ($ odir );
161
- $ desc = $ this ->GetBio2RDFDatasetDescription (
162
- $ this ->GetNamespace (),
163
- "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php " ,
164
- $ bio2rdf_download_files ,
165
- "http://www.ebi.ac.uk/biomodels-main/ " ,
166
- array ("use-share-modify " ),
167
- null , // license
168
- $ this ->GetParameterValue ('download_url ' ),
169
- $ this ->version
170
- );
171
- $ this ->SetWriteFile ($ odir .$ this ->GetBio2RDFReleaseFile ($ this ->GetNamespace ()));
172
- $ this ->GetWriteFile ()->Write ($ desc );
173
- $ this ->GetWriteFile ()->Close ();
148
+ echo "done! " .PHP_EOL ;
149
+
150
+ //generate dataset description
151
+ echo "Generating dataset description for BioModel # $ id... " ;
152
+ $ source_file = (new DataResource ($ this ))
153
+ ->setURI ($ url )
154
+ ->setTitle ("EBI BioModels Database - BioModel # $ id " )
155
+ ->setRetrievedDate ( date ("Y-m-d\TG:i:s\Z " , filemtime ($ download_file )))
156
+ ->setFormat ("rdf/xml " )
157
+ ->setPublisher ("http://www.ebi.ac.uk/ " )
158
+ ->setHomepage ("http://www.ebi.ac.uk/biomodels-main/ " )
159
+ ->setRights ("use-share-modify " )
160
+ ->setLicense ("http://www.ebi.ac.uk/biomodels-main/termsofuse " )
161
+ ->setDataset ("http://identifiers.org/biomodels.db/ " );
162
+
163
+ $ dataset_description .= $ source_file ->toRDF ();
164
+ echo "done! " .PHP_EOL ;
165
+
166
+ }//foreach
167
+
168
+ echo "Generating dataset description for Bio2RDF BioModels... " ;
169
+
170
+ $ prefix = parent ::getPrefix ();
171
+ $ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
172
+ $ date = date ("Y-m-d\TG:i:s\Z " );
173
+ $ output_file = (new DataResource ($ this ))
174
+ ->setURI ("http://download.bio2rdf.org/release/ $ bVersion/ $ prefix/ " )
175
+ ->setTitle ("Bio2RDF v $ bVersion RDF version of $ prefix (generated at $ date) " )
176
+ ->setSource ($ source_file ->getURI ())
177
+ ->setCreator ("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/biomodels/biomodels.php " )
178
+ ->setCreateDate ($ date )
179
+ ->setHomepage ("http://download.bio2rdf.org/release/ $ bVersion/ $ prefix/ $ prefix.html " )
180
+ ->setPublisher ("http://bio2rdf.org " )
181
+ ->setRights ("use-share-modify " )
182
+ ->setRights ("by-attribution " )
183
+ ->setRights ("restricted-by-source-license " )
184
+ ->setLicense ("http://creativecommons.org/licenses/by/3.0/ " )
185
+ ->setDataset (parent ::getDatasetURI ());
186
+
187
+ if ($ gz ) $ output_file ->setFormat ("application/gzip " );
188
+ if (strstr (parent ::getParameterValue ('output_format ' ),"nt " )) $ output_file ->setFormat ("application/n-triples " );
189
+ else $ output_file ->setFormat ("application/n-quads " );
174
190
191
+ $ dataset_description .= $ output_file ->toRDF ();
192
+
193
+ //write dataset description to file
194
+ parent ::setGraphURI ($ graph_uri );
195
+ parent ::setWriteFile ($ odir .parent ::getBio2RDFReleaseFile ());
196
+ parent ::getWriteFile ()->write ($ dataset_description );
197
+ parent ::getWriteFile ()->close ();
198
+ echo "done! " .PHP_EOL ;
199
+
175
200
return true ;
176
201
}
177
-
178
202
}
179
- $ start = microtime (true );
180
-
181
- set_error_handler ('error_handler ' );
182
- $ parser = new BiomodelsParser ($ argv );
183
- $ parser ->Run ();
184
203
185
- $ end = microtime (true );
186
- $ time_taken = $ end - $ start ;
187
- print "Started: " .date ("l jS F \@ g:i:s a " , $ start )."\n" ;
188
- print "Finished: " .date ("l jS F \@ g:i:s a " , $ end )."\n" ;
189
- print "Took: " .$ time_taken ." seconds \n"
190
204
?>
0 commit comments