@@ -178,212 +178,7 @@ function Run()
178
178
parent ::getWriteFile ()->close ();
179
179
echo "done! " .PHP_EOL ;
180
180
}
181
-
182
-
183
181
184
- function Parse ($ data )
185
- {
186
- $ endpoint = "http://s4.semanticscience.org:8010/sparql " ;
187
- // query the endpoint
188
- $ sparql = 'SELECT *
189
- WHERE {
190
- ?x <http://www.biopax.org/release/biopax-level2.owl#xref> ?xref .
191
- ?xref <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type .
192
- ?xref <http://www.biopax.org/release/biopax-level2.owl#db> ?db .
193
- ?xref <http://www.biopax.org/release/biopax-level2.owl#id> ?id .
194
182
}
195
- LIMIT 1
196
- ' ;
197
- $ a = json_decode (file_get_contents ($ endpoint .'?query= ' .urlencode ($ sparql ).'&format=json ' ));
198
- foreach ($ a ->results ->bindings AS $ r ) {
199
- print_r ($ r );exit ;
200
- }
201
-
202
-
203
-
204
-
205
- echo 'parsing... ' ;
206
- $ parser = ARC2 ::getRDFParser ();
207
- $ parser ->parse ('http://pathwaycommons.org ' , $ data );
208
- echo 'building index... ' ;
209
- $ triples = $ parser ->getTriples ();
210
- foreach ($ triples AS $ i => $ a ) {
211
- $ o ['value ' ] = $ a ['o ' ];
212
- $ o ['type ' ] = $ a ['o_type ' ];
213
- $ o ['datatype ' ] = $ a ['o_datatype ' ];
214
- $ index [$ a ['s ' ]][$ a ['p ' ]][] = $ o ;
215
- }
216
-
217
- $ biopax = 'http://www.biopax.org/release/biopax-level2.owl# ' ;
218
- $ cpath = 'http://cbio.mskcc.org/cpath# ' ;
219
-
220
- $ nso = $ this ->GetNS ();
221
- echo 'processing... ' ;
222
- $ total = count ($ index );
223
- $ interval = (int ) (.25 *$ total );
224
- $ z = 0 ;
225
- foreach ($ index AS $ s => $ p_list ) {
226
- if ($ z ++ % $ interval == 0 ) {
227
- echo "$ z of $ total " .PHP_EOL ;
228
- $ this ->WriteRDFBufferToWriteFile ();
229
- }
230
- $ s_uri = str_replace (
231
- array ($ biopax ,$ cpath ),
232
- array ("http://bio2rdf.org/biopaxl2: " ,"http://bio2rdf.org/cpath: " ),
233
- $ s );
234
-
235
- // make the original uri the same as the bio2rdf uri
236
- $ this ->AddRDF ($ this ->Quad ($ s_uri ,$ nso ->GetFQURI ("owl:sameAs " ),$ s ));
237
-
238
-
239
- // handle the unification/relationship xrefs here
240
- if ( isset ($ p_list ['http://www.biopax.org/release/biopax-level2.owl#DB ' ])
241
- && isset ($ p_list ['http://www.biopax.org/release/biopax-level2.owl#ID ' ])) {
242
-
243
- $ db = $ p_list ['http://www.biopax.org/release/biopax-level2.owl#DB ' ][0 ]['value ' ];
244
- $ id = $ p_list ['http://www.biopax.org/release/biopax-level2.owl#ID ' ][0 ]['value ' ];
245
-
246
- if (!$ db || !$ id ) continue ;
247
- // sometimes we see stupid stuff like go:XXXXXX in the id
248
- $ this ->GetNS ()->ParsePrefixedName ($ id ,$ ns2 ,$ id2 );
249
- if ($ ns2 ) $ id = $ id2 ;
250
-
251
- $ qname = $ this ->MapDB ($ db ).": " .$ id ;
252
- $ o_uri = $ this ->GetNS ()->getFQURI ($ qname );
253
- $ this ->AddRDF ($ this ->QuadL ($ s_uri ,$ nso ->GetFQURI ("rdfs:label " ), $ qname ));
254
- $ type = $ p_list ['http://www.w3.org/1999/02/22-rdf-syntax-ns#type ' ][0 ]['value ' ];
255
- if ($ type == 'http://www.biopax.org/release/biopax-level2.owl#unificationXref ' ) {
256
- $ this ->AddRDF ($ this ->Quad ($ s_uri ,$ nso ->GetFQURI ("owl:sameAs " ),$ o_uri ));
257
- } elseif ($ type == 'http://www.biopax.org/release/biopax-level2.owl#relationshipXref ' ) {
258
- $ this ->AddRDF ($ this ->Quad ($ s_uri ,$ nso ->GetFQURI ("biopaxl2:relationshipXref " ),$ o_uri ));
259
- }
260
- continue ;
261
- }
262
-
263
- // now process each relation
264
- foreach ($ p_list AS $ p => $ o_list ) {
265
- $ p_uri = str_replace (
266
- array ("http://www.biopax.org/release/biopax-level2.owl# " ,"http://cbio.mskcc.org/cpath# " ),
267
- array ("http://bio2rdf.org/biopaxv2: " ,"http://bio2rdf.org/cpath: " ),
268
- $ p );
269
-
270
- // now process each object of the relation
271
- foreach ($ o_list AS $ o ) {
272
- if ($ o ['type ' ] == 'uri ' ) {
273
- $ o_uri = str_replace (
274
- array ("http://www.biopax.org/release/biopax-level2.owl# " ,"http://cbio.mskcc.org/cpath# " ),
275
- array ("http://bio2rdf.org/biopaxv2: " ,"http://bio2rdf.org/cpath: " ),
276
- $ o ['value ' ]);
277
- $ this ->AddRDF ($ this ->Quad ($ s_uri ,$ p_uri ,$ o_uri ));
278
- } else {
279
- // literal
280
- $ literal = $ this ->SafeLiteral ($ o ['value ' ]);
281
- $ datatype = null ;
282
- if (isset ($ o ['datatype ' ])) {
283
- if (strstr ($ o ['datatype ' ],"http:// " )) {
284
- $ datatype = $ o ['datatype ' ];
285
- } else {
286
- $ datatype = $ nso ->GetFQURI ($ o ['datatype ' ]);
287
- }
288
- }
289
- $ this ->AddRDF ($ this ->QuadL ($ s_uri ,$ p_uri ,$ literal ,null ,$ datatype ));
290
- }
291
- }
292
- }
293
-
294
- }
295
-
296
- echo 'done! ' .PHP_EOL ;
297
- } // end parse
298
-
299
- function MapDB ($ db )
300
- {
301
- switch ($ db ) {
302
- case "ARACYC " : return "aracyc " ;
303
- case "BRENDA " : return "brenda " ;
304
- case "CAS " : return "cas " ;
305
- case "CHEMICALABSTRACTS " : return "cas " ;
306
- case "ChEBI " : return "chebi " ;
307
- case "CYGD " : return 'cygd ' ;
308
- case "DDBJ/EMBL/GENBANK " : return "genbank " ;
309
- case "ECOCYC " : return 'ecocyc ' ;
310
- case "EMBL " : return 'embl ' ;
311
- case "ENSEMBL " :
312
- case "ENSEMBLGENOMES " :
313
- return "ensembl " ;
314
- case "ENTREZ " :
315
- case "ENTREZ_GENE " :
316
- case "ENTREZGENE/LOCUSLINK " :
317
- return "geneid " ;
318
- case "ENZYMECONSORTIUM " : return "ec " ;
319
- case "EVIDENCE CODES ONTOLOGY " : return "eco " ;
320
- case "GENBANK " :
321
- return 'genbank ' ;
322
- case "GENBANK_NUCL_GI " :
323
- case "GENBANK_PROTEIN_GI " :
324
- return "gi " ;
325
- case "GENE_ONTOLOGY " : return "go " ;
326
- case "GENE_SYMBOL " : return "symbol " ;
327
- case "GRID " : return 'biogrid ' ;
328
-
329
- case "HPRD " : return 'hprd ' ;
330
- case "HUMANCYC " : return 'humancyc ' ;
331
- case "INTACT " : return 'intact ' ;
332
-
333
- case "COMPOUND " :
334
- case "KEGG-LEGACY " :
335
- case "KEGG " :
336
- return "kegg " ;
337
- case "IPI " : return 'ipi ' ;
338
- case "INTERPRO " : return 'interpro ' ;
339
- case "KNAPSACK " : return "knapsack " ;
340
- case "METACYC " : return "metacyc " ;
341
- case "MINT " : return "mint " ;
342
- case "NCBI TAXONOMY " : return "taxon " ;
343
- case "NCBI_TAXONOMY " : return "taxon " ;
344
- case "NCI " : return "pid " ;
345
- case "NEWT " : return "newt " ;
346
- case 'PDB ' : return 'pdb ' ;
347
- case 'PDBE ' : return 'pdb ' ;
348
- case 'PRIDE ' : return 'pride ' ;
349
- case 'PSI-MI ' : return 'psi-mi ' ;
350
- case 'PSI-MOD ' : return 'psi-mod ' ;
351
- case 'PUBCHEM ' : return 'pubchemcompound ' ;
352
- case 'RCSB PDB ' : return 'pdb ' ;
353
- case 'REACTOME ' : return 'reactome ' ;
354
- case 'REACTOME DATABASE ID ' : return 'reactome ' ;
355
- case 'REF_SEQ ' : return 'refseq ' ;
356
- case 'RESID ' : return 'resid ' ;
357
- case 'SGD ' : return 'sgd ' ;
358
- case 'TAXON ' : return 'taxon ' ;
359
- case 'TAXONOMY ' : return 'taxon ' ;
360
- case 'UMBBD-COMPOUNDS ' : return 'umbbd ' ;
361
- case 'UNIPARC ' : return 'uniparc ' ;
362
- case 'UNIPROT ' : return 'uniprot ' ;
363
- case 'WORMBASE ' : return 'wormbase ' ;
364
- case 'WWPDB ' : return 'pdb ' ;
365
-
366
- // what?
367
- case "CABRI " :
368
- case "CPATH " :
369
- case "IOB " :
370
- case 'WIKIPEDIA ' :
371
-
372
- default :
373
- return strtolower ($ db );
374
- }
375
- }
376
- }
377
- $ start = microtime (true );
378
-
379
- set_error_handler ('error_handler ' );
380
- $ parser = new PathwaycommonsParser ($ argv );
381
- $ parser ->Run ();
382
-
383
- $ end = microtime (true );
384
- $ time_taken = $ end - $ start ;
385
- print "Started: " .date ("l jS F \@ g:i:s a " , $ start )."\n" ;
386
- print "Finished: " .date ("l jS F \@ g:i:s a " , $ end )."\n" ;
387
- print "Took: " .$ time_taken ." seconds \n"
388
183
389
184
?>
0 commit comments