Skip to content

Commit 68f7111

Browse files
Merge pull request #277 from alisoncallahan/sgd-r3datadesc
Sgd r3datadesc
2 parents dfd6b38 + 3a5270b commit 68f7111

File tree

1 file changed

+122
-57
lines changed

1 file changed

+122
-57
lines changed

sgd/sgd.php

+122-57
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,19 @@ function __construct($argv) {
4545

4646
function Run(){
4747

48+
if(parent::getParameterValue('download') === true)
49+
{
50+
$this->download();
51+
}
52+
if(parent::getParameterValue('process') === true)
53+
{
54+
$this->process();
55+
}
56+
57+
}
58+
59+
function download(){
60+
4861
if(parent::getParameterValue('files') == 'all') {
4962
$files = explode("|",parent::getParameterList('files'));
5063
array_shift($files);
@@ -53,17 +66,7 @@ function Run(){
5366
}
5467

5568
$ldir = parent::getParameterValue('indir');
56-
$odir = parent::getParameterValue('outdir');
5769
$rdir = parent::getParameterValue('download_url');
58-
59-
//make sure directories end with slash
60-
if(substr($ldir, -1) !== "/"){
61-
$ldir = $ldir."/";
62-
}
63-
64-
if(substr($odir, -1) !== "/"){
65-
$odir = $odir."/";
66-
}
6770

6871
$rfiles = array(
6972
"dbxref" => "curation/chromosomal_feature/dbxref.tab",
@@ -87,68 +90,141 @@ function Run(){
8790
} elseif($ext = "gz"){
8891
$lfile = $ldir."sgd_".$file.".tab.gz";
8992
}
90-
91-
if(!file_exists($lfile) && parent::getParameterValue('download') == false) {
92-
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
93-
parent::setParameterValue('download',true);
94-
}
9593

9694
//download all files [except mapping file]
97-
if(parent::getParameterValue('download') == true && $file !== "mapping") {
95+
if($file !== "mapping") {
9896
$rfile = $rdir.$rfiles[$file];
99-
echo "downloading $file ... ";
97+
echo "Downloading $file ... ";
10098
Utils::DownloadSingle ($rfile, $lfile);
10199
}
100+
}
101+
}
102102

103-
$ofile = $odir."sgd_".$file.'.nt';
103+
function process(){
104+
if(parent::getParameterValue('files') == 'all') {
105+
$files = explode("|",parent::getParameterList('files'));
106+
array_shift($files);
107+
} else {
108+
$files = explode(",",parent::getParameterValue('files'));
109+
}
110+
111+
$ldir = parent::getParameterValue('indir');
112+
$rdir = parent::getParameterValue('download_url');
113+
$odir = parent::getParameterValue('outdir');
114+
115+
$rfiles = array(
116+
"dbxref" => "curation/chromosomal_feature/dbxref.tab",
117+
"features" => "curation/chromosomal_feature/SGD_features.tab",
118+
"domains" => "curation/calculated_protein_info/domains/domains.tab",
119+
"protein" => "curation/calculated_protein_info/protein_properties.tab",
120+
"goa" => "curation/literature/gene_association.sgd.gz",
121+
"goslim" => "curation/literature/go_slim_mapping.tab",
122+
"complex" => "curation/literature/go_protein_complex_slim.tab",
123+
"interaction" => "curation/literature/interaction_data.tab",
124+
"phenotype" => "curation/literature/phenotype_data.tab",
125+
"pathways" => "curation/literature/biochemical_pathways.tab",
126+
"mapping" => "mapping"
127+
);
128+
129+
$graph_uri = parent::getGraphURI();
130+
if(parent::getParameterValue('dataset_graph') == true) parent::setGraphURI(parent::getDatasetURI());
131+
132+
$dataset_description = '';
133+
134+
foreach($files as $file){
135+
136+
$ext = substr(strrchr($rfiles[$file], '.'), 1);
137+
if($ext == "tab"){
138+
$lfile = "sgd_".$file.".tab";
139+
} elseif($ext = "gz"){
140+
$lfile = "sgd_".$file.".tab.gz";
141+
}
142+
143+
$rfile = $rdir.$rfiles[$file];
144+
145+
if(!file_exists($lfile) && parent::getParameterValue('download') == false) {
146+
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
147+
148+
Utils::DownloadSingle ($rfile, $ldir.$lfile);
149+
}
150+
151+
$ofile = "sgd_".$file.'.nt';
152+
104153
$gz=false;
105-
if($this->GetParameterValue('graph_uri')) {$ofile = $odir."sgd_".$file.'.nq'; }
154+
155+
if($this->GetParameterValue('graph_uri')) {$ofile = "sgd_".$file.'.nq'; }
106156

107157
if(strstr(parent::getParameterValue('output_format'), "gz")) {
108158
$ofile .= '.gz';
109159
$gz = true;
110160
}
111161

112-
parent::setWriteFile($ofile, $gz);
162+
parent::setWriteFile($odir.$ofile, $gz);
113163

114164
//parse file
115-
if($ext !== "gz"){
116-
parent::setReadFile($lfile, FALSE);
117-
} else {
118-
parent::setReadFile($lfile, TRUE);
119-
}
165+
parent::setReadFile($ldir.$lfile, $gz);
120166

121167
$fnx = $file;
122-
echo "processing $file... ";
168+
echo "Processing $file... ";
123169
$this->$fnx();
124-
echo "done!";
170+
echo PHP_EOL."done!";
125171

126172
//write RDF to file
127173
parent::writeRDFBufferToWriteFile();
128174

129175
//close write file
130176
parent::getWriteFile()->close();
131177
echo PHP_EOL;
178+
179+
// generate the dataset release file
180+
echo "Generating dataset description... ".PHP_EOL;
181+
// dataset description
182+
$source_file = (new DataResource($this))
183+
->setURI($rfile)
184+
->setTitle("Saccharomyces Genome Database ($file)")
185+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($lfile)))
186+
->setFormat("text/tab-separated-value")
187+
->setFormat("application/gzip")
188+
->setPublisher("http://www.yeastgenome.org/")
189+
->setHomepage("http://www.yeastgenome.org/")
190+
->setRights("use")
191+
->setLicense("http://www.stanford.edu/site/terms.html")
192+
->setDataset("http://identifiers.org/sgd/");
193+
194+
$prefix = parent::getPrefix();
195+
$bVersion = parent::getParameterValue('bio2rdf_release');
196+
$date = date ("Y-m-d\TG:i:s\Z");
197+
$output_file = (new DataResource($this))
198+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$ofile")
199+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix (generated at $date)")
200+
->setSource($source_file->getURI())
201+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php")
202+
->setCreateDate($date)
203+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
204+
->setPublisher("http://bio2rdf.org")
205+
->setRights("use-share-modify")
206+
->setRights("by-attribution")
207+
->setRights("restricted-by-source-license")
208+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
209+
->setDataset(parent::getDatasetURI());
210+
211+
if($gz) $output_file->setFormat("application/gzip");
212+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
213+
else $output_file->setFormat("application/n-quads");
214+
215+
$dataset_description .= $source_file->toRDF().$output_file->toRDF();
132216

133217
}//foreach
134218

135-
// generate the dataset release file
136-
echo "generating dataset release file... ";
137-
$desc = parent::getBio2RDFDatasetDescription(
138-
$this->getPrefix(),
139-
"https://github.com/bio2rdf/bio2rdf-scripts/blob/master/sgd/sgd.php",
140-
$this->getBio2RDFDownloadURL($this->getNamespace()),
141-
"http://yeastgenome.org",
142-
array("use"),
143-
"http://yeastgenome.org",
144-
parent::getParameterValue('download_url'),
145-
parent::getDatasetVersion()
146-
);
147-
$this->setWriteFile($odir.$this->getBio2RDFReleaseFile($this->getNamespace()));
148-
$this->getWriteFile()->write($desc);
149-
$this->getWriteFile()->close();
219+
//set graph URI back to default
220+
parent::setGraphURI($graph_uri);
221+
222+
//write dataset description to file
223+
parent::setWriteFile($odir.parent::getBio2RDFReleaseFile());
224+
parent::getWriteFile()->write($dataset_description);
225+
parent::getWriteFile()->close();
150226
echo "done!".PHP_EOL;
151-
227+
152228
}
153229

154230
function dbxref(){
@@ -732,7 +808,7 @@ function interaction(){
732808

733809
$apoin = fopen($apofile, "r");
734810
if($apoin === FALSE) {
735-
trigger_error("Unable to open $apofile");
811+
trigger_error("Unable to open $apofile", E_USER_ERROR);
736812
exit;
737813
}
738814
$terms = OBOParser($apoin);
@@ -816,7 +892,7 @@ function phenotype(){
816892

817893
$apoin = fopen($apofile, "r");
818894
if($apoin === FALSE) {
819-
trigger_error("Unable to open $apofile");
895+
trigger_error("Unable to open $apofile", E_USER_ERROR);
820896
exit;
821897
}
822898
$terms = OBOParser($apoin);
@@ -869,7 +945,7 @@ function phenotype(){
869945
parent::describeProperty($this->getVoc()."experiment-type", "Relationship between an SGD experiment and the experiment type")
870946
);
871947
} else {
872-
trigger_error("No match for experiment type $label");
948+
trigger_error("No match for experiment type $label", E_USER_WARNING);
873949
}
874950

875951
// mutant type [6]
@@ -1202,16 +1278,5 @@ function GetLatestNCBOOntology($ontology_id,$apikey,$target_filepath){
12021278
Utils::DownloadSingle('http://rest.bioontology.org/bioportal/virtual/download/'.$ontology_id.'?apikey='.$apikey, $target_filepath);
12031279
}
12041280
}//SGDParser
1205-
$start = microtime(true);
1206-
1207-
set_error_handler('error_handler');
1208-
$parser = new SGDParser($argv);
1209-
$parser->Run();
1210-
1211-
$end = microtime(true);
1212-
$time_taken = $end - $start;
1213-
print "Started: ".date("l jS F \@ g:i:s a", $start)."\n";
1214-
print "Finished: ".date("l jS F \@ g:i:s a", $end)."\n";
1215-
print "Took: ".$time_taken." seconds\n"
12161281

12171282
?>

0 commit comments

Comments
 (0)