Skip to content

Commit efb4e35

Browse files
Added dataset description to SGD; removed run lines at end of parser; separated Run() function into download() and process()
1 parent d255500 commit efb4e35

File tree

1 file changed

+130
-51
lines changed

1 file changed

+130
-51
lines changed

sgd/sgd.php

+130-51
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,19 @@ function __construct($argv) {
4545

4646
function Run(){
4747

48+
if(parent::getParameterValue('download') === true)
49+
{
50+
$this->download();
51+
}
52+
if(parent::getParameterValue('process') === true)
53+
{
54+
$this->process();
55+
}
56+
57+
}
58+
59+
function download(){
60+
4861
if(parent::getParameterValue('files') == 'all') {
4962
$files = explode("|",parent::getParameterList('files'));
5063
array_shift($files);
@@ -53,9 +66,57 @@ function Run(){
5366
}
5467

5568
$ldir = parent::getParameterValue('indir');
56-
$odir = parent::getParameterValue('outdir');
5769
$rdir = parent::getParameterValue('download_url');
5870

71+
//make sure directories end with slash
72+
if(substr($ldir, -1) !== "/"){
73+
$ldir = $ldir."/";
74+
}
75+
76+
$rfiles = array(
77+
"dbxref" => "curation/chromosomal_feature/dbxref.tab",
78+
"features" => "curation/chromosomal_feature/SGD_features.tab",
79+
"domains" => "curation/calculated_protein_info/domains/domains.tab",
80+
"protein" => "curation/calculated_protein_info/protein_properties.tab",
81+
"goa" => "curation/literature/gene_association.sgd.gz",
82+
"goslim" => "curation/literature/go_slim_mapping.tab",
83+
"complex" => "curation/literature/go_protein_complex_slim.tab",
84+
"interaction" => "curation/literature/interaction_data.tab",
85+
"phenotype" => "curation/literature/phenotype_data.tab",
86+
"pathways" => "curation/literature/biochemical_pathways.tab",
87+
"mapping" => "mapping"
88+
);
89+
90+
foreach($files as $file){
91+
92+
$ext = substr(strrchr($rfiles[$file], '.'), 1);
93+
if($ext == "tab"){
94+
$lfile = $ldir."sgd_".$file.".tab";
95+
} elseif($ext = "gz"){
96+
$lfile = $ldir."sgd_".$file.".tab.gz";
97+
}
98+
99+
//download all files [except mapping file]
100+
if($file !== "mapping") {
101+
$rfile = $rdir.$rfiles[$file];
102+
echo "Downloading $file ... ";
103+
Utils::DownloadSingle ($rfile, $lfile);
104+
}
105+
}
106+
}
107+
108+
function process(){
109+
if(parent::getParameterValue('files') == 'all') {
110+
$files = explode("|",parent::getParameterList('files'));
111+
array_shift($files);
112+
} else {
113+
$files = explode(",",parent::getParameterValue('files'));
114+
}
115+
116+
$ldir = parent::getParameterValue('indir');
117+
$rdir = parent::getParameterValue('download_url');
118+
$odir = parent::getParameterValue('outdir');
119+
59120
//make sure directories end with slash
60121
if(substr($ldir, -1) !== "/"){
61122
$ldir = $ldir."/";
@@ -79,76 +140,105 @@ function Run(){
79140
"mapping" => "mapping"
80141
);
81142

143+
$graph_uri = parent::getGraphURI();
144+
if(parent::getParameterValue('dataset_graph') == true) parent::setGraphURI(parent::getDatasetURI());
145+
146+
$dataset_description = '';
147+
82148
foreach($files as $file){
83149

84150
$ext = substr(strrchr($rfiles[$file], '.'), 1);
85151
if($ext == "tab"){
86-
$lfile = $ldir."sgd_".$file.".tab";
152+
$lfile = "sgd_".$file.".tab";
87153
} elseif($ext = "gz"){
88-
$lfile = $ldir."sgd_".$file.".tab.gz";
154+
$lfile = "sgd_".$file.".tab.gz";
89155
}
90156

157+
$rfile = $rdir.$rfiles[$file];
158+
91159
if(!file_exists($lfile) && parent::getParameterValue('download') == false) {
92160
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
93-
parent::setParameterValue('download',true);
161+
162+
Utils::DownloadSingle ($rfile, $ldir.$lfile);
94163
}
95164

96-
//download all files [except mapping file]
97-
if(parent::getParameterValue('download') == true && $file !== "mapping") {
98-
$rfile = $rdir.$rfiles[$file];
99-
echo "downloading $file ... ";
100-
Utils::DownloadSingle ($rfile, $lfile);
101-
}
102-
103-
$ofile = $odir."sgd_".$file.'.nt';
165+
$ofile = "sgd_".$file.'.nt';
166+
104167
$gz=false;
105-
if($this->GetParameterValue('graph_uri')) {$ofile = $odir."sgd_".$file.'.nq'; }
168+
169+
if($this->GetParameterValue('graph_uri')) {$ofile = "sgd_".$file.'.nq'; }
106170

107171
if(strstr(parent::getParameterValue('output_format'), "gz")) {
108172
$ofile .= '.gz';
109173
$gz = true;
110174
}
111175

112-
parent::setWriteFile($ofile, $gz);
176+
parent::setWriteFile($odir.$ofile, $gz);
113177

114178
//parse file
115-
if($ext !== "gz"){
116-
parent::setReadFile($lfile, FALSE);
117-
} else {
118-
parent::setReadFile($lfile, TRUE);
119-
}
179+
parent::setReadFile($ldir.$lfile, $gz);
120180

121181
$fnx = $file;
122-
echo "processing $file... ";
182+
echo "Processing $file... ";
123183
$this->$fnx();
124-
echo "done!";
184+
echo PHP_EOL."done!";
125185

126186
//write RDF to file
127187
parent::writeRDFBufferToWriteFile();
128188

129189
//close write file
130190
parent::getWriteFile()->close();
131191
echo PHP_EOL;
192+
193+
// generate the dataset release file
194+
echo "Generating dataset description... ".PHP_EOL;
195+
// dataset description
196+
$source_file = (new DataResource($this))
197+
->setURI($rfile)
198+
->setTitle("Saccharomyces Genome Database ($file)")
199+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($lfile)))
200+
->setFormat("text/tab-separated-value")
201+
->setFormat("application/gzip")
202+
->setPublisher("http://www.yeastgenome.org/")
203+
->setHomepage("http://www.yeastgenome.org/")
204+
->setRights("use")
205+
->setLicense("http://www.stanford.edu/site/terms.html")
206+
->setDataset("http://identifiers.org/sgd/");
207+
208+
$prefix = parent::getPrefix();
209+
$bVersion = parent::getParameterValue('bio2rdf_release');
210+
$date = date ("Y-m-d\TG:i:s\Z");
211+
$output_file = (new DataResource($this))
212+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$ofile")
213+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix (generated at $date)")
214+
->setSource($source_file->getURI())
215+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")
216+
->setCreateDate($date)
217+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
218+
->setPublisher("http://bio2rdf.org")
219+
->setRights("use-share-modify")
220+
->setRights("by-attribution")
221+
->setRights("restricted-by-source-license")
222+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
223+
->setDataset(parent::getDatasetURI());
224+
225+
if($gz) $output_file->setFormat("application/gzip");
226+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
227+
else $output_file->setFormat("application/n-quads");
228+
229+
$dataset_description .= $source_file->toRDF().$output_file->toRDF();
132230

133231
}//foreach
134232

135-
// generate the dataset release file
136-
echo "generating dataset release file... ";
137-
$desc = parent::getBio2RDFDatasetDescription(
138-
$this->getPrefix(),
139-
"https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php",
140-
$this->getBio2RDFDownloadURL($this->getNamespace()),
141-
"http://yeastgenome.org",
142-
array("use"),
143-
"http://yeastgenome.org",
144-
parent::getParameterValue('download_url'),
145-
parent::getDatasetVersion()
146-
);
147-
$this->setWriteFile($odir.$this->getBio2RDFReleaseFile($this->getNamespace()));
148-
$this->getWriteFile()->write($desc);
149-
$this->getWriteFile()->close();
233+
//set graph URI back to default
234+
parent::setGraphURI($graph_uri);
235+
236+
//write dataset description to file
237+
parent::setWriteFile($odir.parent::getBio2RDFReleaseFile());
238+
parent::getWriteFile()->write($dataset_description);
239+
parent::getWriteFile()->close();
150240
echo "done!".PHP_EOL;
151-
241+
152242
}
153243

154244
function dbxref(){
@@ -732,7 +822,7 @@ function interaction(){
732822

733823
$apoin = fopen($apofile, "r");
734824
if($apoin === FALSE) {
735-
trigger_error("Unable to open $apofile");
825+
trigger_error("Unable to open $apofile", E_USER_ERROR);
736826
exit;
737827
}
738828
$terms = OBOParser($apoin);
@@ -816,7 +906,7 @@ function phenotype(){
816906

817907
$apoin = fopen($apofile, "r");
818908
if($apoin === FALSE) {
819-
trigger_error("Unable to open $apofile");
909+
trigger_error("Unable to open $apofile", E_USER_ERROR);
820910
exit;
821911
}
822912
$terms = OBOParser($apoin);
@@ -869,7 +959,7 @@ function phenotype(){
869959
parent::describeProperty($this->getVoc()."experiment-type", "Relationship between an SGD experiment and the experiment type")
870960
);
871961
} else {
872-
trigger_error("No match for experiment type $label");
962+
trigger_error("No match for experiment type $label", E_USER_WARNING);
873963
}
874964

875965
// mutant type [6]
@@ -1202,16 +1292,5 @@ function GetLatestNCBOOntology($ontology_id,$apikey,$target_filepath){
12021292
Utils::DownloadSingle('http://rest.bioontology.org/bioportal/virtual/download/'.$ontology_id.'?apikey='.$apikey, $target_filepath);
12031293
}
12041294
}//SGDParser
1205-
$start = microtime(true);
1206-
1207-
set_error_handler('error_handler');
1208-
$parser = new SGDParser($argv);
1209-
$parser->Run();
1210-
1211-
$end = microtime(true);
1212-
$time_taken = $end - $start;
1213-
print "Started: ".date("l jS F \@ g:i:s a", $start)."\n";
1214-
print "Finished: ".date("l jS F \@ g:i:s a", $end)."\n";
1215-
print "Took: ".$time_taken." seconds\n"
12161295

12171296
?>

0 commit comments

Comments
 (0)