Skip to content

Commit 477a349

Browse files
Update pharmgkb.php
parsing fixes
1 parent b905f41 commit 477a349

File tree

1 file changed

+32
-23
lines changed

1 file changed

+32
-23
lines changed

Diff for: pharmgkb/pharmgkb.php

+32-23
Original file line numberDiff line numberDiff line change
@@ -370,11 +370,17 @@ function genes()
370370
}
371371
}
372372

373-
function parseList($str)
373+
function parseList($str, $delim = ';')
374374
{
375375
$list = '';
376-
if($str[0] == '"') $list = explode('","', substr($str,1,-1));
377-
else $list = array($str);
376+
if($str[0] == '"') {
377+
$list = explode('","', substr($str,1,-1));
378+
} else {
379+
if(strstr($str,$delim)) {
380+
$list = explode($delim, $str);
381+
}
382+
}
383+
if(!is_array($list)) $list = array($str);
378384
return $list;
379385
}
380386

@@ -454,7 +460,7 @@ function drugs()
454460
if(trim($a[2])) {
455461
// generic names
456462
// Entacapona [INN-Spanish],Entacapone [Usan:Inn],Entacaponum [INN-Latin],entacapone
457-
$list = $this->parseList(trim($a[2]));
463+
$list = $this->parseList(trim($a[2]),",");
458464
foreach($list AS $c) {
459465
parent::addRDF(
460466
parent::triplifyString($id, parent::getVoc()."generic_name", str_replace('"','',$c))
@@ -467,7 +473,7 @@ function drugs()
467473
if(trim($a[3])) {
468474
// trade names
469475
//Disorat,OptiPranolol,Trimepranol
470-
$list = $this->parseList(trim($a[3]));
476+
$list = $this->parseList(trim($a[3]),",");
471477
foreach($list as $c) {
472478
parent::addRDF(
473479
parent::triplifyString($id, parent::getVoc()."trade_name", str_replace(array("'", "\""), array("\\\'", "") ,$c))
@@ -480,7 +486,7 @@ function drugs()
480486
if(trim($a[4])) {
481487
// Brand Mixtures
482488
// Benzyl benzoate 99+ %,"Dermadex Crm (Benzoic Acid + Benzyl Benzoate + Lindane + Salicylic Acid + Zinc Oxide + Zinc Undecylenate)",
483-
$list = $this->parseList(trim($a[4]));
489+
$list = $this->parseList(trim($a[4]),",");
484490
foreach($list as $c) {
485491
parent::addRDF(
486492
parent::triplifyString($id, parent::getVoc()."brand_mixture", str_replace(array("'", "\""),array("\\\'",""), $c))
@@ -500,7 +506,7 @@ function drugs()
500506
if(trim($a[6])) {
501507
// Cross References
502508
// drugBank:DB00789,keggDrug:D01707,pubChemCompound:55466,pubChemSubstance:192903,url:http://en.wikipedia.org/wiki/Gadopentetate_dimeglumine
503-
$list = $this->parseList(trim($a[6]));
509+
$list = $this->parseList(trim($a[6]),",");
504510
foreach($list as $c) {
505511
$this->getRegistry()->parseQName($c,$ns,$id1);
506512
if($ns == "chebi") $id1 = substr($id1, 6);
@@ -544,7 +550,10 @@ function drugs()
544550
// External Vocabulary
545551
// ATC:H01AC(Somatropin and somatropin agonists),ATC:V04CD(Tests for pituitary function)
546552
// ATC:D07AB(Corticosteroids, moderately potent (group II)) => this is why you don't use brackets and commas as separators.
547-
$list = $this->parseList(trim($a[10]));
553+
$list = $this->parseList(trim($a[10]),",");
554+
if(strstr($a[10],"potent")) { $c = array(implode(",",$list));$list = $c;}
555+
else if(strstr($a[10],"weak")) { $c = array(implode(",",$list));$list = $c;}
556+
548557
foreach($list as $c) {
549558
preg_match("/([^\(]+)?\((.*)\)/", $c, $m);
550559
if(isset($m[1])) {
@@ -566,7 +575,7 @@ function drugs()
566575
}
567576
if(trim($a[22])) {
568577
// ATC identifiers
569-
$list = $this->parseList(trim($a[22]));
578+
$list = $this->parseList(trim($a[22]),",");
570579
foreach($list as $c) {
571580
parent::addRDF(
572581
parent::triplify($id, parent::getVoc()."x-atc", "atc:".$c)
@@ -868,17 +877,15 @@ function clinical_ann_metadata()
868877
// [5] => Genotype-Phenotypes IDs
869878
// [6] => Text
870879
if($a[5]) {
871-
$gps = explode('","',$a[5]);
872-
$gps_texts = explode('","',$a[6]);
880+
$gps = explode(';',$a[5]);
881+
$gps_texts = explode('; ',$a[6]);
873882
foreach($gps AS $i => $gp) {
874-
$gp = str_replace('"','',trim($gp));
875-
$gp_text = str_replace('"','',trim($gps_texts[$i]));
876-
$b = explode(":",$gp_text,2);
883+
$gp_text = str_replace('\\','',$gps_texts[$i]);
877884

878885
parent::addRDF(
879886
parent::describeIndividual(parent::getNamespace().$gp, $gp_text, parent::getVoc()."Genotype-Phenotype-Association").
880887
parent::triplify($id, parent::getVoc()."genotype_phenotype", parent::getNamespace().$gp).
881-
parent::triplifyString(parent::getNamespace().$gp, parent::getVoc()."genotype", trim($b[0])).
888+
parent::triplifyString(parent::getNamespace().$gp, parent::getVoc()."genotype", trim($gp)).
882889
parent::describeClass(parent::getVoc()."Genotype-Phenotype-Association", "PharmGKB Genotype Phenotype Association").
883890
parent::describeProperty(parent::getVoc()."genotype_phenotype", "Relationship between a PharmGKB entity and a Genotype Phenotype").
884891
parent::describeProperty(parent::getVoc()."genotype", "Relationship between a PharmGKB Genotype Phenotype and a genotype")
@@ -889,11 +896,14 @@ function clinical_ann_metadata()
889896
// [7] => Variant Annotations IDs
890897
// [8] => Variant Annotations
891898
if($a[7]) {
892-
$b = explode('","',$a[7]);
893-
$b_texts = explode('","',$a[8]);
899+
$b = explode(';',$a[7]);
900+
$b_texts = explode(';',$a[8]);
901+
if(count($b) != count($b_texts)) {
902+
trigger_error("Error in parsing variant annotations");
903+
exit();
904+
}
894905
foreach($b AS $i => $variant) {
895-
$variant = str_replace('"','',trim($variant));
896-
$variant_text = str_replace('"','',trim ($b_texts[$i]));
906+
$variant_text = trim($b_texts[$i]);
897907
parent::addRDF(
898908
parent::describeIndividual(parent::getNamespace().$variant, $variant_text, parent::getVoc()."Variant-Annotation").
899909
parent::triplify($id, parent::getVoc()."variant", parent::getNamespace().$variant)
@@ -903,7 +913,7 @@ function clinical_ann_metadata()
903913

904914
// [9] => PMIDs
905915
if($a[9]) {
906-
$b = $this->parseList($a[9]);
916+
$b = explode(';', $a[9]);
907917
foreach($b AS $i => $pmid) {
908918
parent::addRDF(
909919
parent::triplify($id, parent::getVoc()."article", "pubmed:".$pmid)
@@ -921,8 +931,7 @@ function clinical_ann_metadata()
921931

922932
// [11] => Related Chemicals
923933
if($a[11]) {
924-
//print_r($a);exit;
925-
$b = $this->parseList($a[11]);
934+
$b = explode(';', $a[11]);
926935
foreach($b AS $drug_label) {
927936
preg_match('/\(PA(.*)\)/',$drug_label,$m);
928937

@@ -940,7 +949,7 @@ function clinical_ann_metadata()
940949
}
941950
// [12] => Related Diseases
942951
if($a[12]) {
943-
$b = $this->parseList($a[12]);
952+
$b = explode(';', $a[12]);
944953
foreach($b AS $disease_label) {
945954
preg_match('/\(PA(.*)\)/',$disease_label,$m);
946955
if(isset($m[1])) {

0 commit comments

Comments
 (0)