@@ -45,6 +45,19 @@ function __construct($argv) {
45
45
46
46
function Run (){
47
47
48
+ if (parent ::getParameterValue ('download ' ) === true )
49
+ {
50
+ $ this ->download ();
51
+ }
52
+ if (parent ::getParameterValue ('process ' ) === true )
53
+ {
54
+ $ this ->process ();
55
+ }
56
+
57
+ }
58
+
59
+ function download (){
60
+
48
61
if (parent ::getParameterValue ('files ' ) == 'all ' ) {
49
62
$ files = explode ("| " ,parent ::getParameterList ('files ' ));
50
63
array_shift ($ files );
@@ -53,17 +66,7 @@ function Run(){
53
66
}
54
67
55
68
$ ldir = parent ::getParameterValue ('indir ' );
56
- $ odir = parent ::getParameterValue ('outdir ' );
57
69
$ rdir = parent ::getParameterValue ('download_url ' );
58
-
59
- //make sure directories end with slash
60
- if (substr ($ ldir , -1 ) !== "/ " ){
61
- $ ldir = $ ldir ."/ " ;
62
- }
63
-
64
- if (substr ($ odir , -1 ) !== "/ " ){
65
- $ odir = $ odir ."/ " ;
66
- }
67
70
68
71
$ rfiles = array (
69
72
"dbxref " => "curation/chromosomal_feature/dbxref.tab " ,
@@ -87,68 +90,141 @@ function Run(){
87
90
} elseif ($ ext = "gz " ){
88
91
$ lfile = $ ldir ."sgd_ " .$ file .".tab.gz " ;
89
92
}
90
-
91
- if (!file_exists ($ lfile ) && parent ::getParameterValue ('download ' ) == false ) {
92
- trigger_error ($ lfile ." not found. Will attempt to download. " , E_USER_NOTICE );
93
- parent ::setParameterValue ('download ' ,true );
94
- }
95
93
96
94
//download all files [except mapping file]
97
- if (parent :: getParameterValue ( ' download ' ) == true && $ file !== "mapping " ) {
95
+ if ($ file !== "mapping " ) {
98
96
$ rfile = $ rdir .$ rfiles [$ file ];
99
- echo "downloading $ file ... " ;
97
+ echo "Downloading $ file ... " ;
100
98
Utils::DownloadSingle ($ rfile , $ lfile );
101
99
}
100
+ }
101
+ }
102
102
103
- $ ofile = $ odir ."sgd_ " .$ file .'.nt ' ;
103
+ function process (){
104
+ if (parent ::getParameterValue ('files ' ) == 'all ' ) {
105
+ $ files = explode ("| " ,parent ::getParameterList ('files ' ));
106
+ array_shift ($ files );
107
+ } else {
108
+ $ files = explode (", " ,parent ::getParameterValue ('files ' ));
109
+ }
110
+
111
+ $ ldir = parent ::getParameterValue ('indir ' );
112
+ $ rdir = parent ::getParameterValue ('download_url ' );
113
+ $ odir = parent ::getParameterValue ('outdir ' );
114
+
115
+ $ rfiles = array (
116
+ "dbxref " => "curation/chromosomal_feature/dbxref.tab " ,
117
+ "features " => "curation/chromosomal_feature/SGD_features.tab " ,
118
+ "domains " => "curation/calculated_protein_info/domains/domains.tab " ,
119
+ "protein " => "curation/calculated_protein_info/protein_properties.tab " ,
120
+ "goa " => "curation/literature/gene_association.sgd.gz " ,
121
+ "goslim " => "curation/literature/go_slim_mapping.tab " ,
122
+ "complex " => "curation/literature/go_protein_complex_slim.tab " ,
123
+ "interaction " => "curation/literature/interaction_data.tab " ,
124
+ "phenotype " => "curation/literature/phenotype_data.tab " ,
125
+ "pathways " => "curation/literature/biochemical_pathways.tab " ,
126
+ "mapping " => "mapping "
127
+ );
128
+
129
+ $ graph_uri = parent ::getGraphURI ();
130
+ if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
131
+
132
+ $ dataset_description = '' ;
133
+
134
+ foreach ($ files as $ file ){
135
+
136
+ $ ext = substr (strrchr ($ rfiles [$ file ], '. ' ), 1 );
137
+ if ($ ext == "tab " ){
138
+ $ lfile = "sgd_ " .$ file .".tab " ;
139
+ } elseif ($ ext = "gz " ){
140
+ $ lfile = "sgd_ " .$ file .".tab.gz " ;
141
+ }
142
+
143
+ $ rfile = $ rdir .$ rfiles [$ file ];
144
+
145
+ if (!file_exists ($ lfile ) && parent ::getParameterValue ('download ' ) == false ) {
146
+ trigger_error ($ lfile ." not found. Will attempt to download. " , E_USER_NOTICE );
147
+
148
+ Utils::DownloadSingle ($ rfile , $ ldir .$ lfile );
149
+ }
150
+
151
+ $ ofile = "sgd_ " .$ file .'.nt ' ;
152
+
104
153
$ gz =false ;
105
- if ($ this ->GetParameterValue ('graph_uri ' )) {$ ofile = $ odir ."sgd_ " .$ file .'.nq ' ; }
154
+
155
+ if ($ this ->GetParameterValue ('graph_uri ' )) {$ ofile = "sgd_ " .$ file .'.nq ' ; }
106
156
107
157
if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
108
158
$ ofile .= '.gz ' ;
109
159
$ gz = true ;
110
160
}
111
161
112
- parent ::setWriteFile ($ ofile , $ gz );
162
+ parent ::setWriteFile ($ odir . $ ofile , $ gz );
113
163
114
164
//parse file
115
- if ($ ext !== "gz " ){
116
- parent ::setReadFile ($ lfile , FALSE );
117
- } else {
118
- parent ::setReadFile ($ lfile , TRUE );
119
- }
165
+ parent ::setReadFile ($ ldir .$ lfile , $ gz );
120
166
121
167
$ fnx = $ file ;
122
- echo "processing $ file... " ;
168
+ echo "Processing $ file... " ;
123
169
$ this ->$ fnx ();
124
- echo "done! " ;
170
+ echo PHP_EOL . "done! " ;
125
171
126
172
//write RDF to file
127
173
parent ::writeRDFBufferToWriteFile ();
128
174
129
175
//close write file
130
176
parent ::getWriteFile ()->close ();
131
177
echo PHP_EOL ;
178
+
179
+ // generate the dataset release file
180
+ echo "Generating dataset description... " .PHP_EOL ;
181
+ // dataset description
182
+ $ source_file = (new DataResource ($ this ))
183
+ ->setURI ($ rfile )
184
+ ->setTitle ("Saccharomyces Genome Database ( $ file) " )
185
+ ->setRetrievedDate ( date ("Y-m-d\TG:i:s\Z " , filemtime ($ lfile )))
186
+ ->setFormat ("text/tab-separated-value " )
187
+ ->setFormat ("application/gzip " )
188
+ ->setPublisher ("http://www.yeastgenome.org/ " )
189
+ ->setHomepage ("http://www.yeastgenome.org/ " )
190
+ ->setRights ("use " )
191
+ ->setLicense ("http://www.stanford.edu/site/terms.html " )
192
+ ->setDataset ("http://identifiers.org/sgd/ " );
193
+
194
+ $ prefix = parent ::getPrefix ();
195
+ $ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
196
+ $ date = date ("Y-m-d\TG:i:s\Z " );
197
+ $ output_file = (new DataResource ($ this ))
198
+ ->setURI ("http://download.bio2df.org/release/ $ bVersion/ $ prefix/ $ ofile " )
199
+ ->setTitle ("Bio2RDF v $ bVersion RDF version of $ prefix (generated at $ date) " )
200
+ ->setSource ($ source_file ->getURI ())
201
+ ->setCreator ("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php " )
202
+ ->setCreateDate ($ date )
203
+ ->setHomepage ("http://download.bio2rdf.org/release/ $ bVersion/ $ prefix/ $ prefix.html " )
204
+ ->setPublisher ("http://bio2rdf.org " )
205
+ ->setRights ("use-share-modify " )
206
+ ->setRights ("by-attribution " )
207
+ ->setRights ("restricted-by-source-license " )
208
+ ->setLicense ("http://creativecommons.org/licenses/by/3.0/ " )
209
+ ->setDataset (parent ::getDatasetURI ());
210
+
211
+ if ($ gz ) $ output_file ->setFormat ("application/gzip " );
212
+ if (strstr (parent ::getParameterValue ('output_format ' ),"nt " )) $ output_file ->setFormat ("application/n-triples " );
213
+ else $ output_file ->setFormat ("application/n-quads " );
214
+
215
+ $ dataset_description .= $ source_file ->toRDF ().$ output_file ->toRDF ();
132
216
133
217
}//foreach
134
218
135
- // generate the dataset release file
136
- echo "generating dataset release file... " ;
137
- $ desc = parent ::getBio2RDFDatasetDescription (
138
- $ this ->getPrefix (),
139
- "https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php " ,
140
- $ this ->getBio2RDFDownloadURL ($ this ->getNamespace ()),
141
- "http://yeastgenome.org " ,
142
- array ("use " ),
143
- "http://yeastgenome.org " ,
144
- parent ::getParameterValue ('download_url ' ),
145
- parent ::getDatasetVersion ()
146
- );
147
- $ this ->setWriteFile ($ odir .$ this ->getBio2RDFReleaseFile ($ this ->getNamespace ()));
148
- $ this ->getWriteFile ()->write ($ desc );
149
- $ this ->getWriteFile ()->close ();
219
+ //set graph URI back to default
220
+ parent ::setGraphURI ($ graph_uri );
221
+
222
+ //write dataset description to file
223
+ parent ::setWriteFile ($ odir .parent ::getBio2RDFReleaseFile ());
224
+ parent ::getWriteFile ()->write ($ dataset_description );
225
+ parent ::getWriteFile ()->close ();
150
226
echo "done! " .PHP_EOL ;
151
-
227
+
152
228
}
153
229
154
230
function dbxref (){
@@ -732,7 +808,7 @@ function interaction(){
732
808
733
809
$ apoin = fopen ($ apofile , "r " );
734
810
if ($ apoin === FALSE ) {
735
- trigger_error ("Unable to open $ apofile " );
811
+ trigger_error ("Unable to open $ apofile " , E_USER_ERROR );
736
812
exit ;
737
813
}
738
814
$ terms = OBOParser ($ apoin );
@@ -816,7 +892,7 @@ function phenotype(){
816
892
817
893
$ apoin = fopen ($ apofile , "r " );
818
894
if ($ apoin === FALSE ) {
819
- trigger_error ("Unable to open $ apofile " );
895
+ trigger_error ("Unable to open $ apofile " , E_USER_ERROR );
820
896
exit ;
821
897
}
822
898
$ terms = OBOParser ($ apoin );
@@ -869,7 +945,7 @@ function phenotype(){
869
945
parent ::describeProperty ($ this ->getVoc ()."experiment-type " , "Relationship between an SGD experiment and the experiment type " )
870
946
);
871
947
} else {
872
- trigger_error ("No match for experiment type $ label " );
948
+ trigger_error ("No match for experiment type $ label " , E_USER_WARNING );
873
949
}
874
950
875
951
// mutant type [6]
@@ -1202,16 +1278,5 @@ function GetLatestNCBOOntology($ontology_id,$apikey,$target_filepath){
1202
1278
Utils::DownloadSingle ('http://rest.bioontology.org/bioportal/virtual/download/ ' .$ ontology_id .'?apikey= ' .$ apikey , $ target_filepath );
1203
1279
}
1204
1280
}//SGDParser
1205
- $ start = microtime (true );
1206
-
1207
- set_error_handler ('error_handler ' );
1208
- $ parser = new SGDParser ($ argv );
1209
- $ parser ->Run ();
1210
-
1211
- $ end = microtime (true );
1212
- $ time_taken = $ end - $ start ;
1213
- print "Started: " .date ("l jS F \@ g:i:s a " , $ start )."\n" ;
1214
- print "Finished: " .date ("l jS F \@ g:i:s a " , $ end )."\n" ;
1215
- print "Took: " .$ time_taken ." seconds \n"
1216
1281
1217
1282
?>
0 commit comments