@@ -45,6 +45,19 @@ function __construct($argv) {
45
45
46
46
function Run (){
47
47
48
+ if (parent ::getParameterValue ('download ' ) === true )
49
+ {
50
+ $ this ->download ();
51
+ }
52
+ if (parent ::getParameterValue ('process ' ) === true )
53
+ {
54
+ $ this ->process ();
55
+ }
56
+
57
+ }
58
+
59
+ function download (){
60
+
48
61
if (parent ::getParameterValue ('files ' ) == 'all ' ) {
49
62
$ files = explode ("| " ,parent ::getParameterList ('files ' ));
50
63
array_shift ($ files );
@@ -53,9 +66,57 @@ function Run(){
53
66
}
54
67
55
68
$ ldir = parent ::getParameterValue ('indir ' );
56
- $ odir = parent ::getParameterValue ('outdir ' );
57
69
$ rdir = parent ::getParameterValue ('download_url ' );
58
70
71
+ //make sure directories end with slash
72
+ if (substr ($ ldir , -1 ) !== "/ " ){
73
+ $ ldir = $ ldir ."/ " ;
74
+ }
75
+
76
+ $ rfiles = array (
77
+ "dbxref " => "curation/chromosomal_feature/dbxref.tab " ,
78
+ "features " => "curation/chromosomal_feature/SGD_features.tab " ,
79
+ "domains " => "curation/calculated_protein_info/domains/domains.tab " ,
80
+ "protein " => "curation/calculated_protein_info/protein_properties.tab " ,
81
+ "goa " => "curation/literature/gene_association.sgd.gz " ,
82
+ "goslim " => "curation/literature/go_slim_mapping.tab " ,
83
+ "complex " => "curation/literature/go_protein_complex_slim.tab " ,
84
+ "interaction " => "curation/literature/interaction_data.tab " ,
85
+ "phenotype " => "curation/literature/phenotype_data.tab " ,
86
+ "pathways " => "curation/literature/biochemical_pathways.tab " ,
87
+ "mapping " => "mapping "
88
+ );
89
+
90
+ foreach ($ files as $ file ){
91
+
92
+ $ ext = substr (strrchr ($ rfiles [$ file ], '. ' ), 1 );
93
+ if ($ ext == "tab " ){
94
+ $ lfile = $ ldir ."sgd_ " .$ file .".tab " ;
95
+ } elseif ($ ext = "gz " ){
96
+ $ lfile = $ ldir ."sgd_ " .$ file .".tab.gz " ;
97
+ }
98
+
99
+ //download all files [except mapping file]
100
+ if ($ file !== "mapping " ) {
101
+ $ rfile = $ rdir .$ rfiles [$ file ];
102
+ echo "Downloading $ file ... " ;
103
+ Utils::DownloadSingle ($ rfile , $ lfile );
104
+ }
105
+ }
106
+ }
107
+
108
+ function process (){
109
+ if (parent ::getParameterValue ('files ' ) == 'all ' ) {
110
+ $ files = explode ("| " ,parent ::getParameterList ('files ' ));
111
+ array_shift ($ files );
112
+ } else {
113
+ $ files = explode (", " ,parent ::getParameterValue ('files ' ));
114
+ }
115
+
116
+ $ ldir = parent ::getParameterValue ('indir ' );
117
+ $ rdir = parent ::getParameterValue ('download_url ' );
118
+ $ odir = parent ::getParameterValue ('outdir ' );
119
+
59
120
//make sure directories end with slash
60
121
if (substr ($ ldir , -1 ) !== "/ " ){
61
122
$ ldir = $ ldir ."/ " ;
@@ -79,76 +140,105 @@ function Run(){
79
140
"mapping " => "mapping "
80
141
);
81
142
143
+ $ graph_uri = parent ::getGraphURI ();
144
+ if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
145
+
146
+ $ dataset_description = '' ;
147
+
82
148
foreach ($ files as $ file ){
83
149
84
150
$ ext = substr (strrchr ($ rfiles [$ file ], '. ' ), 1 );
85
151
if ($ ext == "tab " ){
86
- $ lfile = $ ldir . "sgd_ " .$ file .".tab " ;
152
+ $ lfile = "sgd_ " .$ file .".tab " ;
87
153
} elseif ($ ext = "gz " ){
88
- $ lfile = $ ldir . "sgd_ " .$ file .".tab.gz " ;
154
+ $ lfile = "sgd_ " .$ file .".tab.gz " ;
89
155
}
90
156
157
+ $ rfile = $ rdir .$ rfiles [$ file ];
158
+
91
159
if (!file_exists ($ lfile ) && parent ::getParameterValue ('download ' ) == false ) {
92
160
trigger_error ($ lfile ." not found. Will attempt to download. " , E_USER_NOTICE );
93
- parent ::setParameterValue ('download ' ,true );
161
+
162
+ Utils::DownloadSingle ($ rfile , $ ldir .$ lfile );
94
163
}
95
164
96
- //download all files [except mapping file]
97
- if (parent ::getParameterValue ('download ' ) == true && $ file !== "mapping " ) {
98
- $ rfile = $ rdir .$ rfiles [$ file ];
99
- echo "downloading $ file ... " ;
100
- Utils::DownloadSingle ($ rfile , $ lfile );
101
- }
102
-
103
- $ ofile = $ odir ."sgd_ " .$ file .'.nt ' ;
165
+ $ ofile = "sgd_ " .$ file .'.nt ' ;
166
+
104
167
$ gz =false ;
105
- if ($ this ->GetParameterValue ('graph_uri ' )) {$ ofile = $ odir ."sgd_ " .$ file .'.nq ' ; }
168
+
169
+ if ($ this ->GetParameterValue ('graph_uri ' )) {$ ofile = "sgd_ " .$ file .'.nq ' ; }
106
170
107
171
if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
108
172
$ ofile .= '.gz ' ;
109
173
$ gz = true ;
110
174
}
111
175
112
- parent ::setWriteFile ($ ofile , $ gz );
176
+ parent ::setWriteFile ($ odir . $ ofile , $ gz );
113
177
114
178
//parse file
115
- if ($ ext !== "gz " ){
116
- parent ::setReadFile ($ lfile , FALSE );
117
- } else {
118
- parent ::setReadFile ($ lfile , TRUE );
119
- }
179
+ parent ::setReadFile ($ ldir .$ lfile , $ gz );
120
180
121
181
$ fnx = $ file ;
122
- echo "processing $ file... " ;
182
+ echo "Processing $ file... " ;
123
183
$ this ->$ fnx ();
124
- echo "done! " ;
184
+ echo PHP_EOL . "done! " ;
125
185
126
186
//write RDF to file
127
187
parent ::writeRDFBufferToWriteFile ();
128
188
129
189
//close write file
130
190
parent ::getWriteFile ()->close ();
131
191
echo PHP_EOL ;
192
+
193
+ // generate the dataset release file
194
+ echo "Generating dataset description... " .PHP_EOL ;
195
+ // dataset description
196
+ $ source_file = (new DataResource ($ this ))
197
+ ->setURI ($ rfile )
198
+ ->setTitle ("Saccharomyces Genome Database ( $ file) " )
199
+ ->setRetrievedDate ( date ("Y-m-d\TG:i:s\Z " , filemtime ($ lfile )))
200
+ ->setFormat ("text/tab-separated-value " )
201
+ ->setFormat ("application/gzip " )
202
+ ->setPublisher ("http://www.yeastgenome.org/ " )
203
+ ->setHomepage ("http://www.yeastgenome.org/ " )
204
+ ->setRights ("use " )
205
+ ->setLicense ("http://www.stanford.edu/site/terms.html " )
206
+ ->setDataset ("http://identifiers.org/sgd/ " );
207
+
208
+ $ prefix = parent ::getPrefix ();
209
+ $ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
210
+ $ date = date ("Y-m-d\TG:i:s\Z " );
211
+ $ output_file = (new DataResource ($ this ))
212
+ ->setURI ("http://download.bio2df.org/release/ $ bVersion/ $ prefix/ $ ofile " )
213
+ ->setTitle ("Bio2RDF v $ bVersion RDF version of $ prefix (generated at $ date) " )
214
+ ->setSource ($ source_file ->getURI ())
215
+ ->setCreator ("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php " )
216
+ ->setCreateDate ($ date )
217
+ ->setHomepage ("http://download.bio2rdf.org/release/ $ bVersion/ $ prefix/ $ prefix.html " )
218
+ ->setPublisher ("http://bio2rdf.org " )
219
+ ->setRights ("use-share-modify " )
220
+ ->setRights ("by-attribution " )
221
+ ->setRights ("restricted-by-source-license " )
222
+ ->setLicense ("http://creativecommons.org/licenses/by/3.0/ " )
223
+ ->setDataset (parent ::getDatasetURI ());
224
+
225
+ if ($ gz ) $ output_file ->setFormat ("application/gzip " );
226
+ if (strstr (parent ::getParameterValue ('output_format ' ),"nt " )) $ output_file ->setFormat ("application/n-triples " );
227
+ else $ output_file ->setFormat ("application/n-quads " );
228
+
229
+ $ dataset_description .= $ source_file ->toRDF ().$ output_file ->toRDF ();
132
230
133
231
}//foreach
134
232
135
- // generate the dataset release file
136
- echo "generating dataset release file... " ;
137
- $ desc = parent ::getBio2RDFDatasetDescription (
138
- $ this ->getPrefix (),
139
- "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php " ,
140
- $ this ->getBio2RDFDownloadURL ($ this ->getNamespace ()),
141
- "http://yeastgenome.org " ,
142
- array ("use " ),
143
- "http://yeastgenome.org " ,
144
- parent ::getParameterValue ('download_url ' ),
145
- parent ::getDatasetVersion ()
146
- );
147
- $ this ->setWriteFile ($ odir .$ this ->getBio2RDFReleaseFile ($ this ->getNamespace ()));
148
- $ this ->getWriteFile ()->write ($ desc );
149
- $ this ->getWriteFile ()->close ();
233
+ //set graph URI back to default
234
+ parent ::setGraphURI ($ graph_uri );
235
+
236
+ //write dataset description to file
237
+ parent ::setWriteFile ($ odir .parent ::getBio2RDFReleaseFile ());
238
+ parent ::getWriteFile ()->write ($ dataset_description );
239
+ parent ::getWriteFile ()->close ();
150
240
echo "done! " .PHP_EOL ;
151
-
241
+
152
242
}
153
243
154
244
function dbxref (){
@@ -732,7 +822,7 @@ function interaction(){
732
822
733
823
$ apoin = fopen ($ apofile , "r " );
734
824
if ($ apoin === FALSE ) {
735
- trigger_error ("Unable to open $ apofile " );
825
+ trigger_error ("Unable to open $ apofile " , E_USER_ERROR );
736
826
exit ;
737
827
}
738
828
$ terms = OBOParser ($ apoin );
@@ -816,7 +906,7 @@ function phenotype(){
816
906
817
907
$ apoin = fopen ($ apofile , "r " );
818
908
if ($ apoin === FALSE ) {
819
- trigger_error ("Unable to open $ apofile " );
909
+ trigger_error ("Unable to open $ apofile " , E_USER_ERROR );
820
910
exit ;
821
911
}
822
912
$ terms = OBOParser ($ apoin );
@@ -869,7 +959,7 @@ function phenotype(){
869
959
parent ::describeProperty ($ this ->getVoc ()."experiment-type " , "Relationship between an SGD experiment and the experiment type " )
870
960
);
871
961
} else {
872
- trigger_error ("No match for experiment type $ label " );
962
+ trigger_error ("No match for experiment type $ label " , E_USER_WARNING );
873
963
}
874
964
875
965
// mutant type [6]
@@ -1202,16 +1292,5 @@ function GetLatestNCBOOntology($ontology_id,$apikey,$target_filepath){
1202
1292
Utils::DownloadSingle ('http://rest.bioontology.org/bioportal/virtual/download/ ' .$ ontology_id .'?apikey= ' .$ apikey , $ target_filepath );
1203
1293
}
1204
1294
}//SGDParser
1205
- $ start = microtime (true );
1206
-
1207
- set_error_handler ('error_handler ' );
1208
- $ parser = new SGDParser ($ argv );
1209
- $ parser ->Run ();
1210
-
1211
- $ end = microtime (true );
1212
- $ time_taken = $ end - $ start ;
1213
- print "Started: " .date ("l jS F \@ g:i:s a " , $ start )."\n" ;
1214
- print "Finished: " .date ("l jS F \@ g:i:s a " , $ end )."\n" ;
1215
- print "Took: " .$ time_taken ." seconds \n"
1216
1295
1217
1296
?>
0 commit comments