@@ -35,7 +35,7 @@ class WormbaseParser extends Bio2RDFizer {
35
35
function __construct ($ argv ) {
36
36
parent ::__construct ($ argv , "wormbase " );
37
37
parent ::addParameter ('files ' , true , 'all|geneIDs|functional_descriptions|gene_associations|gene_interactions|phenotype_associations ' ,'all ' ,'files to process ' );
38
- parent ::addParameter ('release ' , false , null , 'WS243 ' , 'Release version of WormBase ' );
38
+ parent ::addParameter ('release ' , false , null , 'current ' , 'Release version of WormBase ' );
39
39
parent ::addParameter ('download_url ' , false , null ,'ftp://ftp.wormbase.org/pub/wormbase/ ' );
40
40
parent ::initialize ();
41
41
}//constructor
@@ -49,20 +49,21 @@ public function run()
49
49
$ files = explode (", " ,parent ::getParameterValue ('files ' ));
50
50
}
51
51
$ release = parent ::getParameterValue ('release ' );
52
+ $ releaseb = "WS247 " ;
52
53
$ remote_files = array (
53
- "geneIDs " => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758. " .parent :: getParameterValue ( ' release ' ) .".geneIDs.txt.gz " ,
54
- "functional_descriptions " => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758. " .parent :: getParameterValue ( ' release ' ) .".functional_descriptions.txt.gz " ,
55
- "gene_interactions " => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758. " .parent :: getParameterValue ( ' release ' ) .".gene_interactions.txt.gz " ,
56
- "gene_associations " => "releases/ " . $ release. " /ONTOLOGY/gene_association. " .parent :: getParameterValue ( ' release ' ) .".wb " ,
57
- "phenotype_associations " => "releases/ " . $ release. " /ONTOLOGY/phenotype_association. " .parent :: getParameterValue ( ' release ' ) .".wb "
54
+ "geneIDs " => "species/c_elegans/annotation/geneIDs/c_elegans.PRJNA13758. " .$ release .".geneIDs.txt.gz " ,
55
+ "functional_descriptions " => "species/c_elegans/annotation/functional_descriptions/c_elegans.PRJNA13758. " .$ release .".functional_descriptions.txt.gz " ,
56
+ "gene_interactions " => "species/c_elegans/annotation/gene_interactions/c_elegans.PRJNA13758. " .$ release .".gene_interactions.txt.gz " ,
57
+ "gene_associations " => "releases/current-production- release/ONTOLOGY/gene_association. " .$ releaseb .".wb " ,
58
+ "phenotype_associations " => "releases/current-production- release/ONTOLOGY/phenotype_association. " .$ releaseb .".wb "
58
59
);
59
60
60
61
$ local_files = array (
61
62
"geneIDs " => "wormbase. " .parent ::getParameterValue ('release ' ).".genes.txt.gz " ,
62
63
"functional_descriptions " => "wormbase. " .parent ::getParameterValue ('release ' ).".functional_descriptions.txt.gz " ,
63
64
"gene_interactions " => "wormbase. " .parent ::getParameterValue ('release ' ).".gene_interactions.txt.gz " ,
64
65
"gene_associations " => "wormbase. " .parent ::getParameterValue ('release ' ).".gene_association.wb " ,
65
- "phenotype_associations " => "wormbase. " .parent ::getParameterValue ('release ' )."phenotype_associations.wb "
66
+ "phenotype_associations " => "wormbase. " .parent ::getParameterValue ('release ' ).". phenotype_associations.wb "
66
67
);
67
68
68
69
$ idir = parent ::getParameterValue ('indir ' );
@@ -84,7 +85,6 @@ public function run()
84
85
Utils::DownloadSingle ($ rfile , $ lfile );
85
86
echo "done! " .PHP_EOL ;
86
87
}
87
-
88
88
if (strstr ($ lfile , "gz " )){
89
89
parent ::setReadFile ($ lfile , TRUE );
90
90
} else {
@@ -190,10 +190,11 @@ function functional_descriptions()
190
190
{
191
191
while ($ l = $ this ->getReadFile ()->read (2000000 )){
192
192
if ($ l [0 ] == "# " ) continue ;
193
- // gene_id public_name molecular_name concise_description provisional_description detailed_description gene_class_description
193
+ if ( strstr ( $ l , " gene_id " )) continue ;
194
194
195
- $ a = explode ("\t" ,rtrim ($ l ));
196
- if (count ($ a ) != 7 ) {trigger_error ("Found one row that only has " .count ($ a )." columns, expecting 7 " );continue ;}
195
+ // gene_id public_name molecular_name concise_description provisional_description detailed_description automated_description gene_class_description
196
+ $ a = explode ("\t" ,$ l );
197
+ if (count ($ a ) != 8 ) {trigger_error ("Found one row that only has " .count ($ a )." columns, expecting 8 " ,E_USER_ERROR );continue ;}
197
198
198
199
$ id = parent ::getNamespace ().$ a [0 ];
199
200
$ label = $ a [1 ].($ a [2 ]?" ( " .$ a [2 ].") " :"" );
@@ -204,7 +205,8 @@ function functional_descriptions()
204
205
parent ::triplifyString ($ id , parent ::getVoc ()."concise-description " , $ a [3 ]).
205
206
parent ::triplifyString ($ id , parent ::getVoc ()."provisional-description " , $ a [4 ]).
206
207
parent ::triplifyString ($ id , parent ::getVoc ()."detailed-description " , $ a [5 ]).
207
- parent ::triplifyString ($ id , parent ::getVoc ()."gene-class-description " , $ a [6 ])
208
+ parent ::triplifyString ($ id , parent ::getVoc ()."automated-description " , $ a [6 ]).
209
+ parent ::triplifyString ($ id , parent ::getVoc ()."gene-class-description " , trim ($ a [7 ]))
208
210
);
209
211
parent ::writeRDFBufferToWriteFile ();
210
212
}
@@ -258,16 +260,17 @@ function gene_associations(){
258
260
$ split_paper = explode (": " , $ paper );
259
261
if ($ split_paper [0 ] == "PMID " ){
260
262
$ paper_id = "pubmed: " .$ split_paper [1 ];
263
+ parent ::addRDF (
264
+ parent ::triplify ($ association_id , parent ::getVoc ()."x-pubmed " , $ paper_id )
265
+ );
261
266
} elseif ($ split_paper [0 ] == "WB_REF " ){
262
267
$ paper_id = parent ::getNamespace ().$ split_paper [1 ];
263
268
$ paper_label = "Wormbase paper " .$ split_paper [1 ];
264
269
parent ::addRDF (
265
- parent ::describeIndividual ($ paper_id , $ paper_label , parent ::getVoc ()."Publication " )
270
+ parent ::describeIndividual ($ paper_id , $ paper_label , parent ::getVoc ()."Publication " ).
271
+ parent ::triplify ($ association_id , parent ::getVoc ()."publication " , $ paper_id )
266
272
);
267
273
}
268
- parent ::addRDF (
269
- parent ::triplify ($ association_id , parent ::getVoc ()."publication " , $ paper_id )
270
- );
271
274
}//foreach
272
275
parent ::WriteRDFBufferToWriteFile ();
273
276
}//while
@@ -308,18 +311,15 @@ function phenotype_associations()
308
311
309
312
if (strstr ($ data [7 ], "WBVar " )){
310
313
foreach ($ variant AS $ v ) {
311
- $ v = str_replace ("| " ,"" ,$ v );
312
-
313
314
if (trim ($ v ) == '' ) continue ;
314
315
parent ::addRDF (
315
- parent ::describeIndividual (parent :: getNamespace (). $ v , "Variant of " .$ gene , parent ::getVoc ()."Gene-Variant " ).
316
+ parent ::describeIndividual ($ v , "Variant of " .$ gene , parent ::getVoc ()."Gene-Variant " ).
316
317
parent ::describeClass (parent ::getVoc ()."Gene-Variant " ,"Gene Variant " ).
317
- parent ::triplify ($ pa_id , parent ::getVoc ()."associated-gene-variant " , parent :: getNamespace (). $ v )
318
+ parent ::triplify ($ pa_id , parent ::getVoc ()."associated-gene-variant " , $ v )
318
319
);
319
320
}
320
321
} elseif (strstr ($ data [7 ], "WBRNAi " )){
321
322
foreach ($ variant AS $ v ) {
322
- $ v = str_replace ("| " ,"" ,$ v );
323
323
$ var_rnai_id = $ v ;
324
324
$ var_rnai_label = "RNAi " .$ v ;
325
325
$ rnai_exp_id = parent ::getRes ().($ z ++);
@@ -333,7 +333,9 @@ function phenotype_associations()
333
333
parent ::triplify ($ pa_id , parent ::getVoc ()."associated-rnai-knockdown-experiment " , $ rnai_exp_id )
334
334
);
335
335
}
336
- }
336
+ } else {
337
+ // var_dump($variant);
338
+ }
337
339
338
340
if ($ neg ) {
339
341
parent ::addRDF (
0 commit comments