Skip to content

Commit 7dfc33b

Browse files
Started updating pathwaycommons parser to release3
1 parent 59c93b2 commit 7dfc33b

File tree

1 file changed

+112
-50
lines changed

1 file changed

+112
-50
lines changed

Diff for: pathwaycommons/pathwaycommons.php

+112-50
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<?php
22
/**
3-
Copyright (C) 2012 Michel Dumontier
3+
Copyright (C) 2012 Michel Dumontier, Alison Callahan
44
55
Permission is hereby granted, free of charge, to any person obtaining a copy of
66
this software and associated documentation files (the "Software"), to deal in
@@ -21,100 +21,162 @@
2121
SOFTWARE.
2222
*/
2323

24-
require('../../php-lib/biopax2bio2rdf.php');
25-
24+
require_once(__DIR__.'/../../php-lib/biopax2bio2rdf.php');
2625
/**
2726
* Pathwaycommons RDFizer
28-
* @version 1.0
27+
* @version 2.0
2928
* @author Michel Dumontier
29+
* @author Alison Callahan
3030
* @description http://www.pathwaycommons.org
3131
*/
32-
class PathwaycommonsParser extends RDFFactory
32+
class PathwaycommonsParser extends Bio2RDFizer
3333
{
3434
function __construct($argv) {
35-
parent::__construct();
36-
$this->SetDefaultNamespace("pathwaycommons");
37-
38-
// set and print application parameters
39-
$this->AddParameter('files',true,'all|biogrid|cell-map|hprd|humancyc|imid|intact|mint|nci-nature|reactome','all','biopax OWL files to process');
40-
$this->AddParameter('indir',false,null,'/data/download/'.$this->GetNamespace().'/','directory to download into and parse from');
41-
$this->AddParameter('outdir',false,null,'/data/rdf/'.$this->GetNamespace().'/','directory to place rdfized files');
42-
$this->AddParameter('graph_uri',false,null,null,'provide the graph uri to generate n-quads instead of n-triples');
43-
$this->AddParameter('gzip',false,'true|false','true','gzip the output');
44-
$this->AddParameter('download',false,'true|false','false','set true to download files');
45-
$this->AddParameter('download_url',false,null,'http://www.pathwaycommons.org/pc-snapshot/current-release/biopax/by_source/');
46-
if($this->SetParameters($argv) == FALSE) {
47-
$this->PrintParameters($argv);
48-
exit;
49-
}
50-
if($this->CreateDirectory($this->GetParameterValue('indir')) === FALSE) exit;
51-
if($this->CreateDirectory($this->GetParameterValue('outdir')) === FALSE) exit;
52-
if($this->GetParameterValue('graph_uri')) $this->SetGraphURI($this->GetParameterValue('graph_uri'));
53-
54-
return TRUE;
35+
parent::__construct($argv, "pathwaycommons");
36+
parent::addParameter('files',true,'all|homo-sapiens|hprd|humancyc|nci-nature|panther-pathway|phosphositeplus|reactome','all','biopax OWL files to process');
37+
parent::addParameter('download_url',false,null,'http://www.pathwaycommons.org/pc2/downloads/');
38+
parent::initialize();
5539
}
5640

5741
function Run()
5842
{
5943
// get the work
6044
if($this->GetParameterValue('files') == 'all') {
61-
$sources = explode("|",$this->GetParameterList('files'));
45+
$sources = explode("|", parent::getParameterList('files'));
6246
array_shift($sources);
6347
} else {
6448
// comma separated list
65-
$sources = explode(",",$this->GetParameterValue('files'));
49+
$sources = explode(",", parent::getParameterValue('files'));
6650
}
6751

52+
$download_files = array(
53+
"homo-sapiens" => "Pathway%20Commons%202%20homo%20sapiens.BIOPAX.owl.gz",
54+
"hprd" => "Pathway%20Commons%202%20HPRD.BIOPAX.owl.gz",
55+
"humancyc" => "Pathway%20Commons%202%20HumanCyc.BIOPAX.owl.gz",
56+
"nci-nature" => "Pathway%20Commons%202%20NCI_Nature.BIOPAX.owl.gz",
57+
"panther-pathway" => "Pathway%20Commons%202%20PANTHER%20Pathway.BIOPAX.owl.gz",
58+
"phosphositeplus" => "Pathway%20Commons%202%20PhosphoSitePlus.BIOPAX.owl.gz",
59+
"reactome" => "Pathway%20Commons%202%20Reactome.BIOPAX.owl.gz",
60+
);
61+
62+
$graph_uri = parent::getGraphURI();
63+
if(parent::getParameterValue('dataset_graph') == true) parent::setGraphURI(parent::getDatasetURI());
64+
65+
$dataset_description = '';
66+
6867
// iterate over the requested data
6968
foreach($sources AS $source) {
70-
echo "processing $source...";
69+
echo "processing $source... ";
70+
71+
$ldir = parent::getParameterValue('indir');
72+
$odir = parent::getParameterValue('outdir');
73+
$rdir = parent::getParameterValue('download_url');
7174

7275
// set the remote and input files
7376
$file = $source.".owl";
74-
$zfile = $source.".owl.zip";
75-
$rfile = $this->GetParameterValue('download_url').$zfile;
76-
$lfile = $this->GetParameterValue('indir').$zfile;
77+
$zfile = $source.".owl.gz";
78+
$rfile = $rdir.$download_files[$source];
79+
$lfile = $ldir.$zfile;
7780

7881
// download if if the file doesn't exist locally or we are told to
7982
if(!file_exists($lfile) || $this->GetParameterValue('download') == 'true') {
8083
// download
81-
echo "downloading..";
84+
echo "downloading... ";
8285
file_put_contents($lfile, file_get_contents($rfile));
8386
}
8487

8588
// extract the file out of the ziparchive
8689
// and load into a buffer
87-
echo 'extracting...';
88-
$zin = new ZipArchive();
89-
if ($zin->open($lfile) === FALSE) {
90-
trigger_error("Unable to open $lfile");
90+
echo 'extracting... ';
91+
92+
if (($fpin = gzopen($lfile, "r")) === FALSE) {
93+
trigger_error("Unable to open $lfile", E_USER_ERROR);
9194
exit;
9295
}
96+
9397
$data = '';
94-
$fpin = $zin->getStream($file);
95-
while($l = fgets($fpin)) $data .= $l;
96-
fclose($fpin);
98+
while (!gzeof($fpin)) {
99+
$buffer = gzgets($fpin, 4096);
100+
$data .= $buffer;
101+
}
102+
gzclose($fpin);
97103

98104
// set the output file
99-
$outfile = $this->GetParameterValue('outdir').$source.'nt';
105+
$suffix = parent::getParameterValue('output_format');
106+
$outfile = $source.'.'.$suffix;
107+
100108
$gz = false;
101-
if($this->GetParameterValue('graph_uri')) {$outfile = $this->GetParameterValue('outdir').$source.'nq';}
102-
if($this->GetParameterValue('gzip') == 'true') {
103-
$outfile .= '.gz';
109+
if(strstr(parent::getParameterValue('output_format'), "gz")) {
104110
$gz = true;
105111
}
106-
$this->SetWriteFile($outfile, $gz);
112+
113+
parent::setWriteFile($odir.$outfile, $gz);
107114

108-
// parse
109-
$this->Parse($data);
115+
// send for parsing
116+
$p = new BioPAX2Bio2RDF($this);
117+
$p->SetBuffer($data)
118+
->SetBioPAXVersion(3)
119+
->SetBaseNamespace("http://purl.org/pc2/3/")
120+
->SetBio2RDFNamespace("http://bio2rdf.org/pathwaycommons:")
121+
->SetDatasetURI(parent::getDatasetURI());
122+
$rdf = $p->Parse();
123+
parent::addRDF($rdf);
110124

111125
// write to output
112-
$this->WriteRDFBufferToWriteFile();
113-
$this->GetWriteFile()->Close();
126+
parent::writeRDFBufferToWriteFile();
127+
parent::getWriteFile()->Close();
128+
129+
echo "done!".PHP_EOL;
130+
131+
//generate dataset description
132+
echo "Generating dataset description for $zfile... ";
133+
$source_file = (new DataResource($this))
134+
->setURI($rfile)
135+
->setTitle("Pathway Commons")
136+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($lfile)))
137+
->setFormat("rdf/xml")
138+
->setPublisher("http://www.pathwaycommons.org/")
139+
->setHomepage("http://www.pathwaycommons.org/")
140+
->setRights("use")
141+
->setRights("restricted-by-source-license")
142+
->setLicense("http://www.pathwaycommons.org/pc2/home.html#data_sources")
143+
->setDataset("http://identifiers.org/pathwaycommons/");
114144

115-
echo PHP_EOL;
145+
$dataset_description .= $source_file->toRDF();
146+
echo "done!".PHP_EOL;
116147
}
117-
return TRUE;
148+
149+
echo "Generating dataset description for Bio2RDF Pathways Commons dataset... ";
150+
151+
$prefix = parent::getPrefix();
152+
$bVersion = parent::getParameterValue('bio2rdf_release');
153+
$date = date ("Y-m-d\TG:i:s\Z");
154+
$output_file = (new DataResource($this))
155+
->setURI("http://download.bio2rdf.org/release/$bVersion/$prefix/")
156+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix (generated at $date)")
157+
->setSource($source_file->getURI())
158+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/pathwaycommons/pathwaycommons.php")
159+
->setCreateDate($date)
160+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
161+
->setPublisher("http://bio2rdf.org")
162+
->setRights("use-share-modify")
163+
->setRights("by-attribution")
164+
->setRights("restricted-by-source-license")
165+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
166+
->setDataset(parent::getDatasetURI());
167+
168+
if($gz) $output_file->setFormat("application/gzip");
169+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
170+
else $output_file->setFormat("application/n-quads");
171+
172+
$dataset_description .= $output_file->toRDF();
173+
174+
//write dataset description to file
175+
parent::setGraphURI($graph_uri);
176+
parent::setWriteFile($odir.parent::getBio2RDFReleaseFile());
177+
parent::getWriteFile()->write($dataset_description);
178+
parent::getWriteFile()->close();
179+
echo "done!".PHP_EOL;
118180
}
119181

120182

0 commit comments

Comments
 (0)