Skip to content

Commit 7570115

Browse files
Merge branch 'release3' of http://github.com/bio2rdf/bio2rdf-scripts into ct_onefile
2 parents 3c272ab + 9a36fb7 commit 7570115

File tree

6 files changed

+1207
-747
lines changed

6 files changed

+1207
-747
lines changed

ctd/ctd.php

+142-79
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,17 @@ function __construct($argv) {
4242

4343
function Run()
4444
{
45+
if(parent::getParameterValue('download') === true)
46+
{
47+
$this->download();
48+
}
49+
if(parent::getParameterValue('process') === true)
50+
{
51+
$this->process();
52+
}
53+
}
54+
55+
function download(){
4556

4657
// get the file list
4758
if(parent::getParameterValue('files') == 'all') {
@@ -53,92 +64,157 @@ function Run()
5364

5465
//set directory values
5566
$ldir = parent::getParameterValue('indir');
56-
$odir = parent::getParameterValue('outdir');
5767
$rdir = parent::getParameterValue('download_url');
5868

5969
//make sure directories end with slash
6070
if(substr($ldir, -1) !== "/"){
6171
$ldir = $ldir."/";
6272
}
63-
73+
74+
$gz_suffix = ".gz";
75+
76+
foreach($files AS $file) {
77+
if($file == 'chem_gene_ixn_types') $suffix = '.tsv';
78+
else if($file == 'exposure_ontology') $suffix = '.obo';
79+
else $suffix = ".tsv.gz";
80+
$lfile = $ldir.$file.$gz_suffix;
81+
$rfile = $rdir.'CTD_'.$file.$suffix;
82+
if($suffix == ".tsv.gz") {
83+
Utils::DownloadSingle ($rfile, $lfile);
84+
} else {
85+
Utils::DownloadSingle ($rfile, "compress.zlib://".$lfile);
86+
}
87+
}
88+
}
89+
90+
function process(){
91+
// get the file list
92+
if(parent::getParameterValue('files') == 'all') {
93+
$files = explode("|",parent::getParameterList('files'));
94+
array_shift($files);
95+
} else {
96+
$files = explode(",",parent::getParameterValue('files'));
97+
}
98+
99+
$dataset_description = '';
100+
101+
//set directory values
102+
$ldir = parent::getParameterValue('indir');
103+
$rdir = parent::getParameterValue('download_url');
104+
$odir = parent::getParameterValue('outdir');
105+
106+
//make sure input and output directories end with slash
107+
if(substr($ldir, -1) !== "/"){
108+
$ldir = $ldir."/";
109+
}
110+
64111
if(substr($odir, -1) !== "/"){
65112
$odir = $odir."/";
66113
}
67114

68-
$gz_suffix = ".gz";
115+
$graph_uri = parent::getGraphURI();
116+
if(parent::getParameterValue('dataset_graph') == true) parent::setGraphURI(parent::getDatasetURI());
69117

70-
foreach($files AS $file) {
118+
$gz_suffix = ".gz";
119+
120+
foreach($files as $file){
121+
if($file == 'chem_gene_ixn_types') $suffix = '.tsv';
122+
else if($file == 'exposure_ontology') $suffix = '.obo';
123+
else $suffix = ".tsv.gz";
71124

72125
$lfile = $ldir.$file.$gz_suffix;
73-
$ofile = $odir."ctd_".$file.".nt";
126+
$rfile = $rdir.'CTD_'.$file.$suffix;
127+
$ofile = "ctd_".$file.".nt";
74128
$gz = false;
75129

76-
if($this->GetParameterValue('graph_uri')) {
77-
$ofile = $odir."ctd_".$file.'.nq';
78-
}
79-
80-
if(strstr(parent::getParameterValue('output_format'), "gz")) {
81-
$ofile .= '.gz';
82-
$gz = true;
83-
}
84-
85130
if(!file_exists($lfile)) {
86131
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
87-
$this->SetParameterValue('download',true);
88-
}
89-
90-
if(parent::getParameterValue('download') == true) {
91-
if($file == 'chem_gene_ixn_types') $suffix = '.tsv';
92-
else if($file == 'exposure_ontology') $suffix = '.obo';
93-
else $suffix = ".tsv.gz";
94-
95-
$rfile = $rdir.'CTD_'.$file.$suffix;
96132
if($suffix == ".tsv.gz") {
97133
Utils::DownloadSingle ($rfile, $lfile);
98134
} else {
99135
Utils::DownloadSingle ($rfile, "compress.zlib://".$lfile);
100136
}
101137
}
102138

139+
if($this->GetParameterValue('graph_uri')) {
140+
$ofile = "ctd_".$file.'.nq';
141+
}
142+
143+
if(strstr(parent::getParameterValue('output_format'), "gz")) {
144+
$ofile .= '.gz';
145+
$gz = true;
146+
}
147+
103148
echo "Processing ".$file." ...";
104-
parent::setWriteFile($ofile, $gz);
149+
parent::setWriteFile($odir.$ofile, $gz);
105150

106151
//set read file
107152
parent::setReadFile($lfile, TRUE);
108153

109154
$fnx = "CTD_".$file;
110-
if($this->$fnx() === FALSE) {
111-
trigger_error("Error in $fnx");
112-
exit;
113-
}
155+
$this->$fnx();
114156

115-
//write RDF to file
116-
parent::writeRDFBufferToWriteFile();
117-
118157
//close write file
119158
parent::getWriteFile()->close();
120-
echo "Done!".PHP_EOL;
121-
122-
}//foreach
123-
124-
// generate the dataset release file
125-
echo "generating dataset release file... ";
126-
$desc = parent::getBio2RDFDatasetDescription(
127-
$this->getPrefix(),
128-
"https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php",
129-
$this->getBio2RDFDownloadURL($this->getNamespace()),
130-
"http://ctdbase.org",
131-
array("use", "no-commercial"),
132-
"http://ctdbase.org/about/legal.jsp",
133-
parent::getParameterValue('download_url'),
134-
parent::getDatasetVersion()
135-
);
136-
$this->setWriteFile($odir.$this->getBio2RDFReleaseFile($this->getNamespace()));
137-
$this->getWriteFile()->write($desc);
138-
$this->getWriteFile()->close();
159+
echo "done!".PHP_EOL;
160+
161+
// generate the dataset release file
162+
echo "Generating dataset description... ";
163+
164+
if($file == "chemicals"){
165+
$dataset = "http://identifiers.org/ctd.chemical/";
166+
} else if($file == "diseases"){
167+
$dataset = "http://identifiers.org/ctd.disease/";
168+
} else if ($file == "genes"){
169+
$dataset = "http://identifiers.org/ctd.gene/";
170+
} else {
171+
$dataset = null;
172+
}
173+
// dataset description
174+
$source_file = (new DataResource($this))
175+
->setURI($rfile)
176+
->setTitle("Comparative Toxicogenomics Database ($file.$gz_suffix")
177+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($lfile)))
178+
->setFormat("text/tab-separated-value")
179+
->setFormat("application/gzip")
180+
->setPublisher("http://ctdbase.org/")
181+
->setHomepage("http://ctdbase.org/")
182+
->setRights("use")
183+
->setRights("by-attribution")
184+
->setRights("no-commercial")
185+
->setLicense("http://ctdbase.org/about/legal.jsp")
186+
->setDataset($dataset);
187+
188+
$prefix = parent::getPrefix();
189+
$bVersion = parent::getParameterValue('bio2rdf_release');
190+
$date = date ("Y-m-d\TG:i:s\Z");
191+
$output_file = (new DataResource($this))
192+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$ofile")
193+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix (generated at $date)")
194+
->setSource($source_file->getURI())
195+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/ctd/ctd.php")
196+
->setCreateDate($date)
197+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
198+
->setPublisher("http://bio2rdf.org")
199+
->setRights("use-share-modify")
200+
->setRights("by-attribution")
201+
->setRights("restricted-by-source-license")
202+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
203+
->setDataset(parent::getDatasetURI());
204+
205+
if($gz) $output_file->setFormat("application/gzip");
206+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
207+
else $output_file->setFormat("application/n-quads");
208+
209+
$dataset_description .= $source_file->toRDF().$output_file->toRDF();
210+
}
211+
212+
parent::setGraphURI($graph_uri);
213+
parent::setWriteFile($odir.parent::getBio2RDFReleaseFile());
214+
parent::getWriteFile()->write($dataset_description);
215+
parent::getWriteFile()->close();
139216
echo "done!".PHP_EOL;
140-
141-
return TRUE;
217+
142218
}
143219

144220

@@ -162,7 +238,7 @@ function CTD_chemicals()
162238

163239
if($first) {
164240
if(($c = count($a) != 8)) {
165-
trigger_error("Expecting 8 fields, found $c!");return FALSE;
241+
trigger_error("CTD_chemicals function expects 8 fields, found $c!".PHP_EOL, E_USER_WARNING);
166242
}
167243
$first = false;
168244
}
@@ -209,7 +285,7 @@ function CTD_chem_gene_ixns()
209285

210286
if($first) {
211287
if(($c = count($a)) != 11) {
212-
trigger_error("Expecting 11 fields, found $c!");return FALSE;
288+
trigger_error("CTD_chem_gene_ixns function expects 11 fields, found $c!".PHP_EOL, E_USER_WARNING);
213289
}
214290
$first = false;
215291
}
@@ -287,7 +363,7 @@ function CTD_chemicals_diseases()
287363

288364
if($first) {
289365
if(($c = count($a)) != 10) {
290-
trigger_error("Expecting 10 fields, found $c!");return FALSE;
366+
trigger_error("CTD_chemicals_diseases function expects 10 fields, found $c!".PHP_EOL, E_USER_WARNING);
291367
}
292368
$first = false;
293369
}
@@ -357,7 +433,7 @@ function CTD_chem_pathways_enriched()
357433
$a = explode("\t",trim($l));
358434
if($first) {
359435
if(($c = count(explode("\t",$l))) != 11) {
360-
trigger_error("Expecting 11 fields, found $c!");
436+
trigger_error("CTD_chem_pathways_enriched function expects 11 fields, found $c!".PHP_EOL, E_USER_WARNING);
361437
return FALSE;
362438
}
363439
$first = false;
@@ -400,7 +476,7 @@ function CTD_diseases()
400476
// check number of columns
401477
if($first) {
402478
if(($c = count(explode("\t",$l))) != 9) {
403-
trigger_error("Expecting 9 fields, found $c!");
479+
trigger_error("CTD_diseases function expects 9 fields, found $c!".PHP_EOL, E_USER_WARNING);
404480
return FALSE;
405481
}
406482
$first = false;
@@ -436,7 +512,7 @@ function CTD_diseases_pathways()
436512
// check number of columns
437513
if($first) {
438514
if(($c = count(explode("\t",$l))) != 5) {
439-
trigger_error("Expecting 5 fields, found $c!");
515+
trigger_error("CTD_diseases_pathways function expects 5 fields, found $c!".PHP_EOL, E_USER_WARNING);
440516
return FALSE;
441517
}
442518
$first = false;
@@ -478,7 +554,7 @@ function CTD_genes_diseases()
478554
// check number of columns
479555
if($first) {
480556
if(($c = count(explode("\t",$l))) != 9) {
481-
trigger_error("Expecting 9 fields, found $c!");
557+
trigger_error("CTD_genes_diseases function expects 9 fields, found $c!".PHP_EOL, E_USER_WARNING);
482558
return FALSE;
483559
}
484560
$first = false;
@@ -537,7 +613,7 @@ function CTD_genes_pathways()
537613
// check number of columns
538614
if($first) {
539615
if(($c = count(explode("\t",$l))) != 4) {
540-
trigger_error("Expecting 4 fields, found $c!");
616+
trigger_error("CTD_genes_pathways function expects 4 fields, found $c!".PHP_EOL, E_USER_WARNING);
541617
return FALSE;
542618
}
543619
$first = false;
@@ -573,7 +649,7 @@ function CTD_Pathways()
573649
// check number of columns
574650
if($first) {
575651
if(($c = count(explode("\t",$l))) != 2) {
576-
trigger_error("Expecting 2 fields, found $c!");
652+
trigger_error("CTD_pathways function expects 2 fields, found $c!".PHP_EOL, E_USER_WARNING);
577653
return FALSE;
578654
}
579655
$first = false;
@@ -605,16 +681,16 @@ function CTD_Genes()
605681
while($l = $this->GetReadFile()->Read()) {
606682
if($l[0] == '#') continue;
607683
$a = explode("\t",$l);
608-
684+
609685
// check number of columns
610686
if($first) {
611687
if(($c = count(explode("\t",$l))) != 5) {
612-
trigger_error("Expecting 5 fields, found $c!");
688+
trigger_error("CTD_genes function expects 5 fields, found $c!".PHP_EOL, E_USER_WARNING);
613689
return FALSE;
614690
}
615691
$first = false;
616692
}
617-
693+
618694
$symbol = str_replace(array("\\/"),array('|'),$a[0]);
619695
$label = str_replace("\\+/",'+',$a[1]);
620696
$geneid = $a[2];
@@ -656,7 +732,7 @@ function CTD_chem_go_enriched()
656732
// check number of columns
657733
if($first) {
658734
if(($c = count(explode("\t",$l))) != 13) {
659-
trigger_error("Expecting 13 fields, found $c!");
735+
trigger_error("CTD_chem_go_enriched function expects 13 fields, found $c!".PHP_EOL, E_USER_WARNING);
660736
return FALSE;
661737
}
662738
$first = false;
@@ -693,7 +769,7 @@ function CTD_chem_gene_ixn_types()
693769
// check number of columns
694770
if($first) {
695771
if(($c = count(explode("\t",$l))) != 4) {
696-
trigger_error("Expecting 4 fields, found $c!");
772+
trigger_error("CTD_chem_gene_ixn_types function expects 4 fields, found $c!".PHP_EOL, E_USER_WARNING);
697773
return FALSE;
698774
}
699775
$first = false;
@@ -717,17 +793,4 @@ function CTD_chem_gene_ixn_types()
717793

718794
} // end class
719795

720-
$start = microtime(true);
721-
722-
set_error_handler('error_handler');
723-
$parser = new CTDParser($argv);
724-
$parser->Run();
725-
726-
$end = microtime(true);
727-
$time_taken = $end - $start;
728-
print "Started: ".date("l jS F \@ g:i:s a", $start)."\n";
729-
print "Finished: ".date("l jS F \@ g:i:s a", $end)."\n";
730-
print "Took: ".$time_taken." seconds\n"
731-
732-
733796
?>

0 commit comments

Comments
 (0)