Skip to content

Commit d6883df

Browse files
Finished updating pathwaycommons to release 3
1 parent edbeb6c commit d6883df

File tree

1 file changed

+0
-205
lines changed

1 file changed

+0
-205
lines changed

Diff for: pathwaycommons/pathwaycommons.php

-205
Original file line numberDiff line numberDiff line change
@@ -178,212 +178,7 @@ function Run()
178178
parent::getWriteFile()->close();
179179
echo "done!".PHP_EOL;
180180
}
181-
182-
183181

184-
function Parse($data)
185-
{
186-
$endpoint = "http://s4.semanticscience.org:8010/sparql";
187-
// query the endpoint
188-
$sparql = 'SELECT *
189-
WHERE {
190-
?x <http://www.biopax.org/release/biopax-level2.owl#xref> ?xref .
191-
?xref <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type .
192-
?xref <http://www.biopax.org/release/biopax-level2.owl#db> ?db .
193-
?xref <http://www.biopax.org/release/biopax-level2.owl#id> ?id .
194182
}
195-
LIMIT 1
196-
';
197-
$a = json_decode(file_get_contents($endpoint.'?query='.urlencode($sparql).'&format=json'));
198-
foreach($a->results->bindings AS $r) {
199-
print_r($r);exit;
200-
}
201-
202-
203-
204-
205-
echo 'parsing...';
206-
$parser = ARC2::getRDFParser();
207-
$parser->parse('http://pathwaycommons.org', $data);
208-
echo 'building index...';
209-
$triples = $parser->getTriples();
210-
foreach($triples AS $i => $a) {
211-
$o['value'] = $a['o'];
212-
$o['type'] = $a['o_type'];
213-
$o['datatype'] = $a['o_datatype'];
214-
$index[$a['s']][$a['p']][] = $o;
215-
}
216-
217-
$biopax = 'http://www.biopax.org/release/biopax-level2.owl#';
218-
$cpath = 'http://cbio.mskcc.org/cpath#';
219-
220-
$nso = $this->GetNS();
221-
echo 'processing...';
222-
$total = count($index);
223-
$interval = (int) (.25*$total);
224-
$z = 0;
225-
foreach($index AS $s => $p_list) {
226-
if($z++ % $interval == 0) {
227-
echo "$z of $total".PHP_EOL;
228-
$this->WriteRDFBufferToWriteFile();
229-
}
230-
$s_uri = str_replace(
231-
array($biopax,$cpath),
232-
array("http://bio2rdf.org/biopaxl2:","http://bio2rdf.org/cpath:"),
233-
$s);
234-
235-
// make the original uri the same as the bio2rdf uri
236-
$this->AddRDF($this->Quad($s_uri,$nso->GetFQURI("owl:sameAs"),$s));
237-
238-
239-
// handle the unification/relationship xrefs here
240-
if( isset($p_list['http://www.biopax.org/release/biopax-level2.owl#DB'])
241-
&& isset($p_list['http://www.biopax.org/release/biopax-level2.owl#ID'])) {
242-
243-
$db = $p_list['http://www.biopax.org/release/biopax-level2.owl#DB'][0]['value'];
244-
$id = $p_list['http://www.biopax.org/release/biopax-level2.owl#ID'][0]['value'];
245-
246-
if(!$db || !$id) continue;
247-
// sometimes we see stupid stuff like go:XXXXXX in the id
248-
$this->GetNS()->ParsePrefixedName($id,$ns2,$id2);
249-
if($ns2) $id = $id2;
250-
251-
$qname = $this->MapDB($db).":".$id;
252-
$o_uri = $this->GetNS()->getFQURI($qname);
253-
$this->AddRDF($this->QuadL($s_uri,$nso->GetFQURI("rdfs:label"), $qname));
254-
$type = $p_list['http://www.w3.org/1999/02/22-rdf-syntax-ns#type'][0]['value'];
255-
if($type == 'http://www.biopax.org/release/biopax-level2.owl#unificationXref') {
256-
$this->AddRDF($this->Quad($s_uri,$nso->GetFQURI("owl:sameAs"),$o_uri));
257-
} elseif($type == 'http://www.biopax.org/release/biopax-level2.owl#relationshipXref') {
258-
$this->AddRDF($this->Quad($s_uri,$nso->GetFQURI("biopaxl2:relationshipXref"),$o_uri));
259-
}
260-
continue;
261-
}
262-
263-
// now process each relation
264-
foreach($p_list AS $p => $o_list) {
265-
$p_uri = str_replace(
266-
array("http://www.biopax.org/release/biopax-level2.owl#","http://cbio.mskcc.org/cpath#"),
267-
array("http://bio2rdf.org/biopaxv2:","http://bio2rdf.org/cpath:"),
268-
$p);
269-
270-
// now process each object of the relation
271-
foreach($o_list AS $o) {
272-
if($o['type'] == 'uri') {
273-
$o_uri = str_replace(
274-
array("http://www.biopax.org/release/biopax-level2.owl#","http://cbio.mskcc.org/cpath#"),
275-
array("http://bio2rdf.org/biopaxv2:","http://bio2rdf.org/cpath:"),
276-
$o['value']);
277-
$this->AddRDF($this->Quad($s_uri,$p_uri,$o_uri));
278-
} else {
279-
// literal
280-
$literal = $this->SafeLiteral($o['value']);
281-
$datatype = null;
282-
if(isset($o['datatype'])) {
283-
if(strstr($o['datatype'],"http://")) {
284-
$datatype = $o['datatype'];
285-
} else {
286-
$datatype = $nso->GetFQURI($o['datatype']);
287-
}
288-
}
289-
$this->AddRDF($this->QuadL($s_uri,$p_uri,$literal,null,$datatype));
290-
}
291-
}
292-
}
293-
294-
}
295-
296-
echo 'done!'.PHP_EOL;
297-
} // end parse
298-
299-
function MapDB($db)
300-
{
301-
switch($db) {
302-
case "ARACYC": return "aracyc";
303-
case "BRENDA": return "brenda";
304-
case "CAS": return "cas";
305-
case "CHEMICALABSTRACTS": return "cas";
306-
case "ChEBI": return "chebi";
307-
case "CYGD": return 'cygd';
308-
case "DDBJ/EMBL/GENBANK": return "genbank";
309-
case "ECOCYC": return 'ecocyc';
310-
case "EMBL": return 'embl';
311-
case "ENSEMBL":
312-
case "ENSEMBLGENOMES":
313-
return "ensembl";
314-
case "ENTREZ":
315-
case "ENTREZ_GENE":
316-
case "ENTREZGENE/LOCUSLINK":
317-
return "geneid";
318-
case "ENZYMECONSORTIUM": return "ec";
319-
case "EVIDENCE CODES ONTOLOGY": return "eco";
320-
case "GENBANK":
321-
return 'genbank';
322-
case "GENBANK_NUCL_GI":
323-
case "GENBANK_PROTEIN_GI":
324-
return "gi";
325-
case "GENE_ONTOLOGY": return "go";
326-
case "GENE_SYMBOL": return "symbol";
327-
case "GRID": return 'biogrid';
328-
329-
case "HPRD": return 'hprd';
330-
case "HUMANCYC": return 'humancyc';
331-
case "INTACT": return 'intact';
332-
333-
case "COMPOUND":
334-
case "KEGG-LEGACY":
335-
case "KEGG":
336-
return "kegg";
337-
case "IPI": return 'ipi';
338-
case "INTERPRO": return 'interpro';
339-
case "KNAPSACK": return "knapsack";
340-
case "METACYC": return "metacyc";
341-
case "MINT": return "mint";
342-
case "NCBI TAXONOMY": return "taxon";
343-
case "NCBI_TAXONOMY": return "taxon";
344-
case "NCI": return "pid";
345-
case "NEWT": return "newt";
346-
case 'PDB': return 'pdb';
347-
case 'PDBE': return 'pdb';
348-
case 'PRIDE': return 'pride';
349-
case 'PSI-MI': return 'psi-mi';
350-
case 'PSI-MOD': return 'psi-mod';
351-
case 'PUBCHEM': return 'pubchemcompound';
352-
case 'RCSB PDB': return 'pdb';
353-
case 'REACTOME': return 'reactome';
354-
case 'REACTOME DATABASE ID': return 'reactome';
355-
case 'REF_SEQ': return 'refseq';
356-
case 'RESID': return 'resid';
357-
case 'SGD': return 'sgd';
358-
case 'TAXON': return 'taxon';
359-
case 'TAXONOMY': return 'taxon';
360-
case 'UMBBD-COMPOUNDS': return 'umbbd';
361-
case 'UNIPARC': return 'uniparc';
362-
case 'UNIPROT': return 'uniprot';
363-
case 'WORMBASE': return 'wormbase';
364-
case 'WWPDB': return 'pdb';
365-
366-
// what?
367-
case "CABRI":
368-
case "CPATH":
369-
case "IOB":
370-
case 'WIKIPEDIA':
371-
372-
default:
373-
return strtolower($db);
374-
}
375-
}
376-
}
377-
$start = microtime(true);
378-
379-
set_error_handler('error_handler');
380-
$parser = new PathwaycommonsParser($argv);
381-
$parser->Run();
382-
383-
$end = microtime(true);
384-
$time_taken = $end - $start;
385-
print "Started: ".date("l jS F \@ g:i:s a", $start)."\n";
386-
print "Finished: ".date("l jS F \@ g:i:s a", $end)."\n";
387-
print "Took: ".$time_taken." seconds\n"
388183

389184
?>

0 commit comments

Comments
 (0)