Skip to content

Commit f2e155f

Browse files
Merge pull request #278 from alisoncallahan/gene-r3datadesc
Gene r3datadesc
2 parents 9886cea + cf5eb8d commit f2e155f

File tree

1 file changed

+106
-41
lines changed

1 file changed

+106
-41
lines changed

gene/entrez_gene.php

+106-41
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,62 @@ function __construct($argv) {
5858
}//constructor
5959

6060
function Run(){
61+
if(parent::getParameterValue('download') === true)
62+
{
63+
$this->download();
64+
}
65+
if(parent::getParameterValue('process') === true)
66+
{
67+
$this->process();
68+
}
69+
}//run
70+
71+
function download(){
6172
$ldir = parent::getParameterValue('indir');
62-
$odir = parent::getParameterValue('outdir');
6373
$rdir = parent::getParameterValue('download_url');
6474

65-
//make sure directories end with slash
66-
if(substr($ldir, -1) !== "/"){
67-
$ldir = $ldir."/";
75+
//which files are to be converted?
76+
$files = parent::getParameterValue('files');
77+
if($files == 'all') {
78+
$files = $this->getPackageMap();
79+
} else {
80+
$sel_arr = explode(",",$selectedPackage);
81+
$pm = $this->getPackageMap();
82+
$files = array();
83+
foreach($sel_arr as $a){
84+
if(array_key_exists($a, $pm)){
85+
$files[$a] = $pm[$a];
86+
}
87+
}
6888
}
69-
70-
if(substr($odir, -1) !== "/"){
71-
$odir = $odir."/";
89+
90+
//now iterate over the files array
91+
foreach ($files as $id => $file){
92+
echo "Processing $id ... ";
93+
94+
$lfile = $ldir.$id.".gz";
95+
96+
// download
97+
//don't use subdirectory GENE_INFO for saving local version of All_data.gene_info.gz
98+
if($id == "gene2sts" || $id == "gene2unigene") {
99+
$rfile = "compress.zlib://".$rdir.$file;
100+
} else {
101+
$rfile = $rdir.$file;
102+
}
103+
Utils::DownloadSingle($rfile, $lfile);
72104
}
73105

106+
}
107+
108+
function process(){
109+
110+
$ldir = parent::getParameterValue('indir');
111+
$odir = parent::getParameterValue('outdir');
112+
$rdir = parent::getParameterValue('download_url');
113+
74114
//which files are to be converted?
75-
$selectedPackage = trim($this->GetParameterValue('files'));
76-
if($selectedPackage == 'all') {
115+
$files = trim($this->GetParameterValue('files'));
116+
if($files == 'all') {
77117
$files = $this->getPackageMap();
78118
} else {
79119
$sel_arr = explode(",",$selectedPackage);
@@ -85,14 +125,22 @@ function Run(){
85125
}
86126
}
87127
}
128+
129+
//set dataset graph to be dataset URI
130+
$graph_uri = parent::getGraphURI();
131+
if(parent::getParameterValue('dataset_graph') == true) parent::setGraphURI(parent::getDatasetURI());
132+
133+
$dataset_description = '';
134+
88135
//now iterate over the files array
89136
foreach ($files as $id => $file){
90137
echo "Processing $id ... ";
91138

92139
$lfile = $ldir.$id.".gz";
93140

94141
// download
95-
if(!file_exists($lfile) || parent::getParameterValue('download') == true) {
142+
if(!file_exists($lfile)) {
143+
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
96144
//don't use subdirectory GENE_INFO for saving local version of All_data.gene_info.gz
97145
if($id == "gene2sts" || $id == "gene2unigene") {
98146
$rfile = "compress.zlib://".$rdir.$file;
@@ -102,7 +150,7 @@ function Run(){
102150
Utils::DownloadSingle($rfile, $lfile);
103151
}
104152

105-
$ofile = $odir.$id.".nt";
153+
$ofile = $id.".nt";
106154
$gz = false;
107155
if(parent::getParameterValue('graph_uri')) {
108156
$ofile .= ".nq";
@@ -113,34 +161,63 @@ function Run(){
113161
}
114162

115163
parent::setReadFile($lfile, true);
116-
parent::setWriteFile($ofile, $gz);
164+
parent::setWriteFile($odir.$ofile, $gz);
117165
$fnx = $id;
118-
echo ' parsing ...';
166+
echo ' Processing $id ...'.PHP_EOL;
119167
$this->$fnx();
120168
echo 'done!'.PHP_EOL;
121169
parent::getReadFile()->Close();
122170
parent::getWriteFile()->Close();
171+
172+
// generate the dataset release file
173+
echo "Generating dataset description... ";
174+
// dataset description
175+
$source_file = (new DataResource($this))
176+
->setURI($rfile)
177+
->setTitle("NCBI Gene ($id)")
178+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($lfile)))
179+
->setFormat("text/tab-separated-value")
180+
->setFormat("application/gzip")
181+
->setPublisher("http://www.ncbi.nlm.nih.gov")
182+
->setHomepage("http://www.ncbi.nlm.nih.gov/gene")
183+
->setRights("use-share-modify")
184+
->setLicense("http://www.ncbi.nlm.nih.gov/About/disclaimer.html")
185+
->setDataset("http://identifiers.org/ncbigene/");
186+
187+
$prefix = parent::getPrefix();
188+
$bVersion = parent::getParameterValue('bio2rdf_release');
189+
$date = date ("Y-m-d\TG:i:s\Z");
190+
$output_file = (new DataResource($this))
191+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$ofile")
192+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix (generated at $date)")
193+
->setSource($source_file->getURI())
194+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/gene/entrez_gene.php")
195+
->setCreateDate($date)
196+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
197+
->setPublisher("http://bio2rdf.org")
198+
->setRights("use-share-modify")
199+
->setRights("by-attribution")
200+
->setRights("restricted-by-source-license")
201+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
202+
->setDataset(parent::getDatasetURI());
203+
204+
if($gz) $output_file->setFormat("application/gzip");
205+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
206+
else $output_file->setFormat("application/n-quads");
207+
208+
$dataset_description .= $source_file->toRDF().$output_file->toRDF();
123209

124210
}//foreach
125211

126-
// generate the dataset release file
127-
echo "generating dataset release file... ";
128-
$desc = parent::getBio2RDFDatasetDescription(
129-
$this->getPrefix(),
130-
"https://github.com/bio2rdf/bio2rdf-scripts/blob/master/gene/entrez_gene.php",
131-
$this->getBio2RDFDownloadURL($this->getNamespace()),
132-
"http://yeastgenome.org",
133-
array("use-share-modify"),
134-
"http://www.ncbi.nlm.nih.gov/About/disclaimer.html",
135-
parent::getParameterValue('download_url'),
136-
parent::getDatasetVersion()
137-
);
138-
$this->setWriteFile($odir.$this->getBio2RDFReleaseFile($this->getNamespace()));
139-
$this->getWriteFile()->write($desc);
140-
$this->getWriteFile()->close();
212+
//set graph URI back to default value
213+
parent::setGraphURI($graph_uri);
214+
//write dataset description to file
215+
parent::setWriteFile($odir.parent::getBio2RDFReleaseFile());
216+
parent::getWriteFile()->write($dataset_description);
217+
parent::getWriteFile()->close();
141218
echo "done!".PHP_EOL;
142219

143-
}//run
220+
}
144221

145222
#see: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/README
146223
private function gene2vega(){
@@ -673,16 +750,4 @@ public function getPackageMap(){
673750
return self::$packageMap;
674751
}
675752
}
676-
677-
$start = microtime(true);
678-
679-
set_error_handler('error_handler');
680-
$parser = new EntrezGeneParser($argv);
681-
$parser-> Run();
682-
683-
$end = microtime(true);
684-
$time_taken = $end - $start;
685-
print "Started: ".date("l jS F \@ g:i:s a", $start)."\n";
686-
print "Finished: ".date("l jS F \@ g:i:s a", $end)."\n";
687-
print "Took: ".$time_taken." seconds\n"
688753
?>

0 commit comments

Comments
 (0)