Skip to content

Commit 3ced855

Browse files
update to wormbase
1 parent a2345e0 commit 3ced855

File tree

1 file changed

+24
-22
lines changed

1 file changed

+24
-22
lines changed

wormbase/wormbase.php

+24-22
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class WormbaseParser extends Bio2RDFizer {
3535
function __construct($argv) {
3636
parent::__construct($argv, "wormbase");
3737
parent::addParameter('files', true, 'all|geneIDs|functional_descriptions|gene_associations|gene_interactions|phenotype_associations','all','files to process');
38-
parent::addParameter('release', false, null, 'WS243', 'Release version of WormBase');
38+
parent::addParameter('release', false, null, 'current', 'Release version of WormBase');
3939
parent::addParameter('download_url', false, null,'ftp://ftp.wormbase.org/pub/wormbase/');
4040
parent::initialize();
4141
}//constructor
@@ -49,20 +49,21 @@ public function run()
4949
$files = explode(",",parent::getParameterValue('files'));
5050
}
5151
$release = parent::getParameterValue('release');
52+
$releaseb = "WS247";
5253
$remote_files = array(
53-
"geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758.".parent::getParameterValue('release').".geneIDs.txt.gz",
54-
"functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758.".parent::getParameterValue('release').".functional_descriptions.txt.gz",
55-
"gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758.".parent::getParameterValue('release').".gene_interactions.txt.gz",
56-
"gene_associations" => "releases/".$release."/ONTOLOGY/gene_association.".parent::getParameterValue('release').".wb",
57-
"phenotype_associations" => "releases/".$release."/ONTOLOGY/phenotype_association.".parent::getParameterValue('release').".wb"
54+
"geneIDs" => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758.".$release.".geneIDs.txt.gz",
55+
"functional_descriptions" => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758.".$release.".functional_descriptions.txt.gz",
56+
"gene_interactions" => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758.".$release.".gene_interactions.txt.gz",
57+
"gene_associations" => "releases/current-production-release/ONTOLOGY/gene_association.".$releaseb.".wb",
58+
"phenotype_associations" => "releases/current-production-release/ONTOLOGY/phenotype_association.".$releaseb.".wb"
5859
);
5960

6061
$local_files = array(
6162
"geneIDs" => "wormbase.".parent::getParameterValue('release').".genes.txt.gz",
6263
"functional_descriptions" => "wormbase.".parent::getParameterValue('release').".functional_descriptions.txt.gz",
6364
"gene_interactions" => "wormbase.".parent::getParameterValue('release').".gene_interactions.txt.gz",
6465
"gene_associations" => "wormbase.".parent::getParameterValue('release').".gene_association.wb",
65-
"phenotype_associations" => "wormbase.".parent::getParameterValue('release')."phenotype_associations.wb"
66+
"phenotype_associations" => "wormbase.".parent::getParameterValue('release').".phenotype_associations.wb"
6667
);
6768

6869
$idir = parent::getParameterValue('indir');
@@ -84,7 +85,6 @@ public function run()
8485
Utils::DownloadSingle($rfile, $lfile);
8586
echo "done!".PHP_EOL;
8687
}
87-
8888
if(strstr($lfile, "gz")){
8989
parent::setReadFile($lfile, TRUE);
9090
} else {
@@ -190,10 +190,11 @@ function functional_descriptions()
190190
{
191191
while($l = $this->getReadFile()->read(2000000)){
192192
if($l[0] == "#") continue;
193-
// gene_id public_name molecular_name concise_description provisional_description detailed_description gene_class_description
193+
if(strstr($l,"gene_id")) continue;
194194

195-
$a = explode("\t",rtrim($l));
196-
if(count($a) != 7) {trigger_error("Found one row that only has ".count($a)." columns, expecting 7");continue;}
195+
// gene_id public_name molecular_name concise_description provisional_description detailed_description automated_description gene_class_description
196+
$a = explode("\t",$l);
197+
if(count($a) != 8) {trigger_error("Found one row that only has ".count($a)." columns, expecting 8",E_USER_ERROR);continue;}
197198

198199
$id = parent::getNamespace().$a[0];
199200
$label = $a[1].($a[2]?" (".$a[2].")":"");
@@ -204,7 +205,8 @@ function functional_descriptions()
204205
parent::triplifyString($id, parent::getVoc()."concise-description", $a[3]).
205206
parent::triplifyString($id, parent::getVoc()."provisional-description", $a[4]).
206207
parent::triplifyString($id, parent::getVoc()."detailed-description", $a[5]).
207-
parent::triplifyString($id, parent::getVoc()."gene-class-description", $a[6])
208+
parent::triplifyString($id, parent::getVoc()."automated-description", $a[6]).
209+
parent::triplifyString($id, parent::getVoc()."gene-class-description", trim($a[7]))
208210
);
209211
parent::writeRDFBufferToWriteFile();
210212
}
@@ -258,16 +260,17 @@ function gene_associations(){
258260
$split_paper = explode(":", $paper);
259261
if($split_paper[0] == "PMID"){
260262
$paper_id = "pubmed:".$split_paper[1];
263+
parent::addRDF(
264+
parent::triplify($association_id, parent::getVoc()."x-pubmed", $paper_id)
265+
);
261266
} elseif($split_paper[0] == "WB_REF"){
262267
$paper_id = parent::getNamespace().$split_paper[1];
263268
$paper_label = "Wormbase paper ".$split_paper[1];
264269
parent::addRDF(
265-
parent::describeIndividual($paper_id, $paper_label, parent::getVoc()."Publication")
270+
parent::describeIndividual($paper_id, $paper_label, parent::getVoc()."Publication").
271+
parent::triplify($association_id, parent::getVoc()."publication", $paper_id)
266272
);
267273
}
268-
parent::addRDF(
269-
parent::triplify($association_id, parent::getVoc()."publication", $paper_id)
270-
);
271274
}//foreach
272275
parent::WriteRDFBufferToWriteFile();
273276
}//while
@@ -308,18 +311,15 @@ function phenotype_associations()
308311

309312
if(strstr($data[7], "WBVar")){
310313
foreach($variant AS $v) {
311-
$v = str_replace("|","",$v);
312-
313314
if(trim($v) == '') continue;
314315
parent::addRDF(
315-
parent::describeIndividual(parent::getNamespace().$v, "Variant of ".$gene, parent::getVoc()."Gene-Variant").
316+
parent::describeIndividual($v, "Variant of ".$gene, parent::getVoc()."Gene-Variant").
316317
parent::describeClass(parent::getVoc()."Gene-Variant","Gene Variant").
317-
parent::triplify($pa_id, parent::getVoc()."associated-gene-variant", parent::getNamespace().$v)
318+
parent::triplify($pa_id, parent::getVoc()."associated-gene-variant", $v)
318319
);
319320
}
320321
} elseif(strstr($data[7], "WBRNAi")){
321322
foreach($variant AS $v) {
322-
$v = str_replace("|","",$v);
323323
$var_rnai_id = $v;
324324
$var_rnai_label = "RNAi ".$v;
325325
$rnai_exp_id = parent::getRes().($z++);
@@ -333,7 +333,9 @@ function phenotype_associations()
333333
parent::triplify($pa_id, parent::getVoc()."associated-rnai-knockdown-experiment", $rnai_exp_id)
334334
);
335335
}
336-
}
336+
} else {
337+
// var_dump($variant);
338+
}
337339

338340
if($neg) {
339341
parent::addRDF(

0 commit comments

Comments
 (0)