Skip to content

Commit 275763c

Browse files
Merge pull request #463 from micheldumontier/release3
fixes for various parsers
2 parents 2bd40ca + c31f77a commit 275763c

File tree

6 files changed

+41
-19
lines changed

6 files changed

+41
-19
lines changed

drugbank/drugbank.php

+6-5
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class DrugBankParser extends Bio2RDFizer
3636
function __construct($argv) {
3737
parent::__construct($argv,"drugbank");
3838
parent::addParameter('files', true, 'all|drugbank','all','Files to convert');
39-
parent::addParameter('download_url',false,null,'http://www.drugbank.ca/system/downloads/current/');
39+
parent::addParameter('download_url',false,null,'https://www.drugbank.ca/releases/5-0-5/downloads/all-full-database');
4040
parent::initialize();
4141
}
4242

@@ -70,8 +70,9 @@ function Run()
7070
$dataset_description = '';
7171
foreach($files AS $f) {
7272
if($f == 'drugbank') {
73-
$file = 'drugbank.xml.zip';
73+
$file = 'drugbank_all_full_database.xml.zip';
7474
$lname = 'drugbank';
75+
$insidezip_file = "full database.xml";
7576
}
7677
$fnx = 'parse_'.$f;
7778

@@ -91,7 +92,7 @@ function Run()
9192
if(file_exists($indir.$file)) {
9293
// call the parser
9394
echo "processing $file ...".PHP_EOL;
94-
$this->$fnx($indir,$file);
95+
$this->$fnx($indir,$file, $insidezip_file);
9596
echo "done".PHP_EOL;
9697
parent::clear();
9798
}
@@ -146,9 +147,9 @@ function Run()
146147
}
147148

148149

149-
function parse_drugbank($ldir,$infile)
150+
function parse_drugbank($ldir,$infile, $insidezip_file)
150151
{
151-
$xml = new CXML($ldir.$infile);
152+
$xml = new CXML($ldir.$infile, $insidezip_file);
152153
while($xml->parse("drug") == TRUE) {
153154
if(isset($this->id_list) and count($this->id_list) == 0) break;
154155
$this->parseDrugEntry($xml);

goa/goa.php

+21-9
Original file line numberDiff line numberDiff line change
@@ -57,22 +57,22 @@ function run(){
5757

5858
foreach($files as $file){
5959
$download = parent::getParameterValue('download');
60-
$lfile = $ldir."goa_".$file.".gz";
60+
$basefile = "goa_".$file.".gaf";
61+
$lfile = $ldir."/".$basefile.".gz";
6162
if(!file_exists($lfile) && $download == false) {
6263
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
6364
$download = true;
6465
}
6566

6667
//download file
67-
$rfile = $rdir.strtoupper($file)."/gene_association.goa_".$file.".gz";
68+
$rfile = $rdir.strtoupper($file)."/".$basefile.".gz";
6869
if($download == true) {
69-
echo "downloading $file ... ";
70-
//file_put_contents($lfile,file_get_contents($rfile));
70+
echo "downloading $file".".gz ... ";
7171
utils::DownloadSingle($rfile,$lfile);
7272
}
7373

7474
$gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true;
75-
$ofile = "goa_".$file.".".parent::getParameterValue('output_format');
75+
$ofile = $basefile.".".parent::getParameterValue('output_format');
7676

7777
parent::setReadFile($lfile, TRUE);
7878
parent::setWriteFile($odir.$ofile, $gz);
@@ -153,6 +153,9 @@ function process($file){
153153
$goid = substr($fields[4],3);
154154
$refs = $this->getDbReferences($fields[5]);
155155
$eco = $this->getEvidenceCodeLabelArr($fields[6]);
156+
if($eco === null) {
157+
print_r($fields[6]);exit;
158+
}
156159
$aspect = $this->getAspect($fields[8]);
157160
$label = $fields[9];
158161
$synonyms = explode("|", $fields[10]);
@@ -291,6 +294,15 @@ function getEvidenceCodeLabelArr($aec){
291294
"IGI"=> array("Inferred from Genetic Interaction","0000316"),
292295
"IEP"=> array("Inferred from Expression Pattern", "0000008")
293296
);
297+
298+
$htp = array(
299+
"HTP" => array("Inferred from High Throughput Experiment",""),
300+
"HDA" => array("Inferred from High Throughput Direct Assay",""),
301+
"HMP" => array("Inferred from Hight Throughput Mutant Phenotype",""),
302+
"HGI" => array("Inferred from High Throughput Genetic Interaction",""),
303+
"HEP" => array("Inferred from High Throughput Expression Pattern","")
304+
);
305+
294306
//computational analysis codes
295307
$cac = array(
296308
"ISS"=> array("Inferred from Sequence or Structural Similarity","0000027"),
@@ -319,9 +331,11 @@ function getEvidenceCodeLabelArr($aec){
319331
$aac = array(
320332
"IEA"=>array("Inferred from Electronic Annotation", "0000203")
321333
);
322-
334+
323335
if(array_key_exists($aec, $ec)){
324336
return array("experimental evidence code"=>$ec[$aec]);
337+
}elseif(array_key_exists($aec, $htp)){
338+
return array("high throughput code"=>$htp[$aec]);
325339
}elseif(array_key_exists($aec, $cac)){
326340
return array("computational analysis code"=>$cac[$aec]);
327341
}elseif(array_key_exists($aec, $asc)){
@@ -330,10 +344,8 @@ function getEvidenceCodeLabelArr($aec){
330344
return array("curator statement code"=>$csc[$aec]);
331345
}elseif(array_key_exists($aec, $aac)){
332346
return array("automatically assigned code"=>$aac[$aec]);
333-
}elseif(array_key_exists($aec, $oec)){
334-
return array("obsolete evidence code"=>$oec[$aec]);
335347
}else{
336-
return null;
348+
return array("unmapped evidence code"=> $aec);
337349
}
338350
} else {
339351
return null;

hgnc/hgnc.php

+2
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,12 @@ function Run(){
117117
function process(){
118118
$header = $this->getReadFile()->read(200000);
119119
$header_arr = explode("\t", $header);
120+
120121
$h = array_flip($header_arr);
121122

122123
$c = count($h);
123124
$n = 52;
125+
124126
if ($c != $n)
125127
{
126128
echo PHP_EOL;

kegg/kegg.php

+8-3
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ function parseEntry($lfile)
675675
}
676676
if(in_array($k, array("INTERACTION","METABOLISM","TARGET"))) {
677677
// dopamine D2-receptor antagonist [HSA:1813] [KO:K04145]
678+
// K04348 K06268 K17610 K17611
678679
$id = parent::getRes().md5($uri.$v);
679680
$type = ucfirst(strtolower($k));
680681
if(in_array($k, array("INTERACTION","METABOLISM"))) {
@@ -697,10 +698,14 @@ function parseEntry($lfile)
697698
if(isset($m[1]) and !empty($m[1])) {
698699
foreach($m[1] AS $item) {
699700
$a = explode(':',$item); // get the namespace
701+
if(!isset($a[1])) {continue;} // skip this.
700702
$b = explode(' ',$a[1]);
701703
foreach($b AS $c) {
702-
if(!strstr($item,"KO")) $i = "kegg:".$a[0].'_'.$c;
703-
else $i = "kegg:".$c;
704+
if(!strstr($item,"KO")) {
705+
$i = "kegg:".$a[0].'_'.$c;
706+
} else {
707+
$i = "kegg:".$c;
708+
}
704709
parent::addRDF(
705710
parent::triplify($id,parent::getVoc()."link",$i)
706711
);
@@ -845,4 +850,4 @@ function parseKGML($lfile)
845850
}
846851

847852

848-
853+

miriam/miriam.php

+3-1
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,16 @@ function parseItem($item)
175175
$rid = parent::getRes().str_replace(":","",$id)."_".($i+1);
176176
$a = $myitem['@attributes'];
177177
$rid_type = parent::getVoc().'restriction_type_'.$a['type'];
178+
$page = '';
179+
if(isset($myitem['link']) and strstr($myitem['link'],"http") !== FALSE) $page = $myitem['link'];
178180

179181
parent::addRDF(
180182
parent::describeIndividual($rid, $a['desc'], parent::getVoc()."Restriction").
181183
parent::describeClass(parent::getVoc()."Restriction", "Resource Restriction").
182184
parent::triplify($rid, "rdf:type", $rid_type).
183185
parent::describeClass($rid_type, $a['desc'], parent::getVoc()."Restriction").
184186
parent::triplifyString($rid, "dct:description", $myitem['statement']).
185-
parent::triplify($rid, "foaf:page", isset($myitem['link'])?$myitem['link']:"").
187+
parent::triplify($rid, "foaf:page", $page).
186188
parent::triplify($id, parent::getVoc()."restriction", $rid)
187189
);
188190
}}

refseq/refseq.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ function process(){
212212
$p = "/(\S+)\=(.*)/";
213213
preg_match($p, $aL, $m);
214214
if(count($m)){
215-
if($m[1] == "db_xref"){
215+
if($m[1] == "db_xref" and strstr($m[2],"http") !== FALSE){
216216
parent::AddRDF(
217217
parent::triplify($feat_res, "rdfs:seeAlso", str_replace("\"", "", $m[2]))
218218
);

0 commit comments

Comments
 (0)