Skip to content

Commit 02a94c3

Browse files
Merge pull request #475 from micheldumontier/master
updates for v5
2 parents 690f1e7 + 7d68c17 commit 02a94c3

File tree

7 files changed

+82
-70
lines changed

7 files changed

+82
-70
lines changed

Diff for: bioportal/bioportal.php

+11-7
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ public function TriplifyMap($a, $prefix)
367367

368368
} else {
369369
parent::addRDF(
370-
parent::triplifyString($s_uri,$p_uri,addslashes($a['o']),(($a['o_datatype'] == '')?null:$a['o_datatype']),(($a['o_lang'] == '')?null:$a['o_lang']))
370+
parent::triplifyString($s_uri,$p_uri,$a['o'],(($a['o_datatype'] == '')?null:$a['o_datatype']),(($a['o_lang'] == '')?null:$a['o_lang']))
371371
);
372372
}
373373

@@ -464,7 +464,7 @@ function OBO2RDF($abbv)
464464
$tid = $ns.":".$id;
465465
echo $tid.PHP_EOL;
466466
} else if($a[0] == "name") {
467-
$name = addslashes(stripslashes($a[1]));
467+
$name = stripslashes($a[1]);
468468
$buf .= parent::describeClass($tid,$name);
469469
$buf .= parent::triplifyString($tid,"dc:title",$name);
470470
} else if($a[0] == "is_a") {
@@ -520,7 +520,9 @@ function OBO2RDF($abbv)
520520
$b = explode(":",$a[1],2);
521521
if(isset($b[1])) {
522522
if(substr($b[1],0,4) == "http") {
523-
$buf .= parent::triplify($tid,"rdfs:seeAlso", stripslashes($b[1]));
523+
// https://en.wikipedia.org/wiki/Prolamin {source="SUBMITTER"}
524+
$url = preg_replace("/{.*\}/","",$b[1]);
525+
$buf .= parent::triplify($tid,"rdfs:seeAlso", $url);
524526
} else {
525527
$ns = str_replace(array(" ","\\",) ,"",strtolower($b[0]));
526528
$id = trim($b[1]);
@@ -547,10 +549,12 @@ function OBO2RDF($abbv)
547549
if($ns == "submitter") $ns = "chebi.submitter";
548550
if($ns == "wikipedia" || $ns == "mesh") $id = str_replace(" ","+",$id);
549551
if($ns == "id-validation-regexp") {
550-
$buf .= parent::triplifyString($tid,"obo_vocabulary:$ns", addslashes($id));
552+
$buf .= parent::triplifyString($tid,"obo_vocabulary:$ns", $id);
551553
} else {
552-
if($ns)
553-
$buf .= parent::triplify($tid,"obo_vocabulary:x-$ns", "$ns:".str_replace(" ","-",$id));
554+
if($ns) {
555+
$id = str_replace(array(" ",",","#","<",">"),array("%20","%2C","%23","%3C","%3E"),$id);
556+
$buf .= parent::triplify($tid,"obo_vocabulary:x-$ns", "$ns:$id");
557+
}
554558
}
555559
}
556560
}
@@ -688,7 +692,7 @@ function OBO2RDF($abbv)
688692
//header
689693
//format-version: 1.0
690694
$buf .= parent::triplifyString($ouri,"obo_vocabulary:$a[0]",
691-
str_replace( array('"','\:'), array('\"',':'), isset($a[1])?$a[1]:""));
695+
str_replace( array('\:'), array(':'), isset($a[1])?$a[1]:""));
692696
}
693697

694698
if($minimal || $minimalp) parent::getWriteFile()->write($min);

Diff for: drugbank/drugbank.php

+18-8
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,11 @@ function NSMap($source)
185185
case 'genbank protein database':
186186
return 'gi';
187187
case 'hugo gene nomenclature committee (hgnc)':
188-
return 'hgnc';
188+
return 'hgnc';
189+
case 'therapeutic targets database':
190+
return 'ttd';
191+
case 'rxcui':
192+
return 'rxnorm';
189193

190194
default:
191195
return strtolower($source);
@@ -455,21 +459,24 @@ function parseDrugEntry(&$xml)
455459
foreach($x->mixtures->mixture AS $item) {
456460
if(isset($item)) {
457461
$o = $item;
458-
$mid = parent::getRes().md5(str_replace(" ","-",$o->name[0]));
462+
$label = str_replace(array(" ","%"),array("-", "percent"), $o->name[0]);
463+
$ingredients = str_replace(array(" ","%"),array("-", "percent"), $o->ingredients[0]);
464+
$mid = parent::getRes().md5($label);
459465

460466
parent::addRDF(
461467
parent::triplify($did,parent::getVoc()."mixture",$mid).
462-
parent::describeIndividual($mid,$o->name[0],parent::getVoc()."Mixture").
468+
parent::describeIndividual($mid,$label,parent::getVoc()."Mixture").
463469
parent::describeClass(parent::getVoc()."Mixture","mixture").
464-
parent::triplifyString($mid,$this->getVoc()."ingredients","".$o->ingredients[0])
470+
parent::triplifyString($mid,$this->getVoc()."ingredients","".$ingredients)
465471
);
466472

467473
$a = explode(" + ",$o->ingredients[0]);
468474
foreach($a AS $b) {
469475
$b = trim($b);
470-
$iid = parent::getRes().str_replace(" ","-",$b);
476+
$label = str_replace(array(" ",",","%"),array("-","-","percent"),$b);
477+
$iid = parent::getRes().$label;
471478
parent::addRDF(
472-
parent::describeClass($iid,$b, parent::getVoc()."Ingredient").
479+
parent::describeClass($iid,$label, parent::getVoc()."Ingredient").
473480
parent::describeClass(parent::getVoc()."Ingredient","Ingredient").
474481
parent::triplify($mid,parent::getVoc()."ingredient",$iid)
475482
);
@@ -661,7 +668,7 @@ function parseDrugEntry(&$xml)
661668
$ns = $this->NSMap($obj->resource);
662669
$id = $obj->identifier;
663670
if($ns == "genecards") $id = str_replace(array(" "),array("_"),$id);
664-
671+
if($ns == "wikipedia") $id = str_replace(array(","), "-", $id);
665672
parent::addRDF(
666673
parent::triplify($did,parent::getVoc()."x-$ns","$ns:$id")
667674
);
@@ -781,7 +788,10 @@ function AddList(&$x, $id, $list_name, $item_name, $predicate, $list_item_name =
781788
if(isset($mylist)) {
782789
foreach($mylist AS $item) {
783790
$label = ''.$item;
784-
$kid = parent::getVoc().ucfirst(str_replace(" ","-",$label)); // generate a new identifier for the list item
791+
$label = str_replace(array(" ","[","]",","),"-",$label);
792+
$label = rawurlencode(utf8_encode($label));
793+
794+
$kid = parent::getVoc().ucfirst($label); // generate a new identifier for the list item
785795
$this->addRDF(
786796
$this->describeIndividual($kid,$label,parent::getVoc().ucfirst($item_name)).
787797
$this->describeClass(parent::getVoc().ucfirst($item_name),ucfirst($item_name)).

Diff for: lsr/lsr.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ function parse()
173173
if($r['pubmed']) {
174174
foreach(explode(",",$r['pubmed']) AS $pubmed) {
175175
parent::addRDF(
176-
parent::triplify($id,"cito:citesAsAuthority","pubmed:".$pubmed)
176+
parent::triplify($id,"cito:citesAsAuthority","pubmed:".trim($pubmed))
177177
// parent::triplify("pubmed:".$pubmed, "rdf:type", "pubmed_vocabulary:Resource")
178178
);
179179
}
@@ -206,7 +206,7 @@ function parse()
206206
&& $r['homepage'] !== 'dead'
207207
&& $r['homepage'] !== 'unavailable') {
208208
parent::addRDF(
209-
parent::triplify($id,"foaf:page",$r['homepage'])
209+
parent::triplify($id,"foaf:page",trim($r['homepage']))
210210
);
211211
}
212212
if($r['license']) {

Diff for: mgi/mgi.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -266,14 +266,14 @@ function MGI_GenePheno()
266266
}
267267

268268
if($a[5]) {
269-
$pmids = explode(",",$a[5]);
269+
$pmids = explode("|",$a[5]);
270270
foreach($pmids AS $pmid) {
271271
parent::addRDF(
272272
parent::triplify($id,$this->getVoc()."x-pubmed","pubmed:".$pmid)
273273
);
274274
}
275275
}
276-
$b = explode(",",$a[6]);
276+
$b = explode("|",$a[6]);
277277
foreach($b AS $marker) {
278278
parent::addRDF(
279279
parent::triplify($id,$this->getVoc()."marker",$marker).

Diff for: pharmgkb/pharmgkb.php

+35-37
Original file line numberDiff line numberDiff line change
@@ -370,11 +370,17 @@ function genes()
370370
}
371371
}
372372

373-
function parseList($str)
373+
function parseList($str, $delim = ';')
374374
{
375375
$list = '';
376-
if($str[0] == '"') $list = explode('","', substr($str,1,-1));
377-
else $list = array($str);
376+
if($str[0] == '"') {
377+
$list = explode('","', substr($str,1,-1));
378+
} else {
379+
if(strstr($str,$delim)) {
380+
$list = explode($delim, $str);
381+
}
382+
}
383+
if(!is_array($list)) $list = array($str);
378384
return $list;
379385
}
380386

@@ -454,7 +460,7 @@ function drugs()
454460
if(trim($a[2])) {
455461
// generic names
456462
// Entacapona [INN-Spanish],Entacapone [Usan:Inn],Entacaponum [INN-Latin],entacapone
457-
$list = $this->parseList(trim($a[2]));
463+
$list = $this->parseList(trim($a[2]),",");
458464
foreach($list AS $c) {
459465
parent::addRDF(
460466
parent::triplifyString($id, parent::getVoc()."generic_name", str_replace('"','',$c))
@@ -467,7 +473,7 @@ function drugs()
467473
if(trim($a[3])) {
468474
// trade names
469475
//Disorat,OptiPranolol,Trimepranol
470-
$list = $this->parseList(trim($a[3]));
476+
$list = $this->parseList(trim($a[3]),",");
471477
foreach($list as $c) {
472478
parent::addRDF(
473479
parent::triplifyString($id, parent::getVoc()."trade_name", str_replace(array("'", "\""), array("\\\'", "") ,$c))
@@ -480,7 +486,7 @@ function drugs()
480486
if(trim($a[4])) {
481487
// Brand Mixtures
482488
// Benzyl benzoate 99+ %,"Dermadex Crm (Benzoic Acid + Benzyl Benzoate + Lindane + Salicylic Acid + Zinc Oxide + Zinc Undecylenate)",
483-
$list = $this->parseList(trim($a[4]));
489+
$list = $this->parseList(trim($a[4]),",");
484490
foreach($list as $c) {
485491
parent::addRDF(
486492
parent::triplifyString($id, parent::getVoc()."brand_mixture", str_replace(array("'", "\""),array("\\\'",""), $c))
@@ -500,7 +506,7 @@ function drugs()
500506
if(trim($a[6])) {
501507
// Cross References
502508
// drugBank:DB00789,keggDrug:D01707,pubChemCompound:55466,pubChemSubstance:192903,url:http://en.wikipedia.org/wiki/Gadopentetate_dimeglumine
503-
$list = $this->parseList(trim($a[6]));
509+
$list = $this->parseList(trim($a[6]),",");
504510
foreach($list as $c) {
505511
$this->getRegistry()->parseQName($c,$ns,$id1);
506512
if($ns == "chebi") $id1 = substr($id1, 6);
@@ -544,7 +550,10 @@ function drugs()
544550
// External Vocabulary
545551
// ATC:H01AC(Somatropin and somatropin agonists),ATC:V04CD(Tests for pituitary function)
546552
// ATC:D07AB(Corticosteroids, moderately potent (group II)) => this is why you don't use brackets and commas as separators.
547-
$list = $this->parseList(trim($a[10]));
553+
$list = $this->parseList(trim($a[10]),",");
554+
if(strstr($a[10],"potent")) { $c = array(implode(",",$list));$list = $c;}
555+
else if(strstr($a[10],"weak")) { $c = array(implode(",",$list));$list = $c;}
556+
548557
foreach($list as $c) {
549558
preg_match("/([^\(]+)?\((.*)\)/", $c, $m);
550559
if(isset($m[1])) {
@@ -566,7 +575,7 @@ function drugs()
566575
}
567576
if(trim($a[22])) {
568577
// ATC identifiers
569-
$list = $this->parseList(trim($a[22]));
578+
$list = $this->parseList(trim($a[22]),",");
570579
foreach($list as $c) {
571580
parent::addRDF(
572581
parent::triplify($id, parent::getVoc()."x-atc", "atc:".$c)
@@ -802,7 +811,7 @@ function variants()
802811

803812
function clinical_ann_metadata()
804813
{
805-
$header = array("Clinical Annotation Id","Location","Gene","Level of Evidence","Clinical Annotation Types","Genotype-Phenotype IDs","Annotation Text","Variant Annotations IDs","Variant Annotations","PMIDs","Evidence Count","Related Drugs","Related Diseases","Biogeographical groups", "Chromosome");
814+
$header = array("Clinical Annotation Id","Location","Gene","Level of Evidence","Clinical Annotation Types","Genotype-Phenotype IDs","Annotation Text","Variant Annotations IDs","Variant Annotations","PMIDs","Evidence Count","Related Chemicals","Related Diseases","Biogeographical groups", "Chromosome","Latest History");
806815
$this_header = explode("\t",$this->getReadFile()->read());
807816
if(count($this_header) != count($header)) {
808817
trigger_error("Change in the number of columns. Expected ".count($header).", but found ".count($this_header),E_USER_ERROR);
@@ -812,19 +821,8 @@ function clinical_ann_metadata()
812821
$a = explode("\t",$l);
813822

814823
$id = parent::getNamespace().$a[0];
815-
# fixing bad file formatting
816-
if($a[0] == "982040598" or $a[0] == "982037603") {
817-
$a[8] .= $a[11];
818-
$a[9] = $a[12];
819-
$a[10] = $a[13];
820-
$a[11]= $a[14];
821-
$a[12] = $a[15];
822-
$a[13] = $a[16];
823-
$a[14] = $a[17];
824-
}
825-
826-
827824
$label = "clinical genotype to phenotype annotations for ".$a[1];
825+
828826
// [0] => Clinical Annotation Id
829827
parent::addRDF(
830828
parent::describeIndividual($id, $label, parent::getVoc()."Clinical-Annotation").
@@ -879,17 +877,15 @@ function clinical_ann_metadata()
879877
// [5] => Genotype-Phenotypes IDs
880878
// [6] => Text
881879
if($a[5]) {
882-
$gps = explode('","',$a[5]);
883-
$gps_texts = explode('","',$a[6]);
880+
$gps = explode(';',$a[5]);
881+
$gps_texts = explode('; ',$a[6]);
884882
foreach($gps AS $i => $gp) {
885-
$gp = str_replace('"','',trim($gp));
886-
$gp_text = str_replace('"','',trim($gps_texts[$i]));
887-
$b = explode(":",$gp_text,2);
883+
$gp_text = str_replace('\\','',$gps_texts[$i]);
888884

889885
parent::addRDF(
890886
parent::describeIndividual(parent::getNamespace().$gp, $gp_text, parent::getVoc()."Genotype-Phenotype-Association").
891887
parent::triplify($id, parent::getVoc()."genotype_phenotype", parent::getNamespace().$gp).
892-
parent::triplifyString(parent::getNamespace().$gp, parent::getVoc()."genotype", trim($b[0])).
888+
parent::triplifyString(parent::getNamespace().$gp, parent::getVoc()."genotype", trim($gp)).
893889
parent::describeClass(parent::getVoc()."Genotype-Phenotype-Association", "PharmGKB Genotype Phenotype Association").
894890
parent::describeProperty(parent::getVoc()."genotype_phenotype", "Relationship between a PharmGKB entity and a Genotype Phenotype").
895891
parent::describeProperty(parent::getVoc()."genotype", "Relationship between a PharmGKB Genotype Phenotype and a genotype")
@@ -900,11 +896,14 @@ function clinical_ann_metadata()
900896
// [7] => Variant Annotations IDs
901897
// [8] => Variant Annotations
902898
if($a[7]) {
903-
$b = explode('","',$a[7]);
904-
$b_texts = explode('","',$a[8]);
899+
$b = explode(';',$a[7]);
900+
$b_texts = explode(';',$a[8]);
901+
if(count($b) != count($b_texts)) {
902+
trigger_error("Error in parsing variant annotations");
903+
exit();
904+
}
905905
foreach($b AS $i => $variant) {
906-
$variant = str_replace('"','',trim($variant));
907-
$variant_text = str_replace('"','',trim ($b_texts[$i]));
906+
$variant_text = trim($b_texts[$i]);
908907
parent::addRDF(
909908
parent::describeIndividual(parent::getNamespace().$variant, $variant_text, parent::getVoc()."Variant-Annotation").
910909
parent::triplify($id, parent::getVoc()."variant", parent::getNamespace().$variant)
@@ -914,7 +913,7 @@ function clinical_ann_metadata()
914913

915914
// [9] => PMIDs
916915
if($a[9]) {
917-
$b = $this->parseList($a[9]);
916+
$b = explode(';', $a[9]);
918917
foreach($b AS $i => $pmid) {
919918
parent::addRDF(
920919
parent::triplify($id, parent::getVoc()."article", "pubmed:".$pmid)
@@ -930,10 +929,9 @@ function clinical_ann_metadata()
930929
);
931930
}
932931

933-
// [11] => Related Drugs
932+
// [11] => Related Chemicals
934933
if($a[11]) {
935-
//print_r($a);exit;
936-
$b = $this->parseList($a[11]);
934+
$b = explode(';', $a[11]);
937935
foreach($b AS $drug_label) {
938936
preg_match('/\(PA(.*)\)/',$drug_label,$m);
939937

@@ -951,7 +949,7 @@ function clinical_ann_metadata()
951949
}
952950
// [12] => Related Diseases
953951
if($a[12]) {
954-
$b = $this->parseList($a[12]);
952+
$b = explode(';', $a[12]);
955953
foreach($b AS $disease_label) {
956954
preg_match('/\(PA(.*)\)/',$disease_label,$m);
957955
if(isset($m[1])) {

Diff for: sgd/sgd.php

+6-6
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ function process(){
126126
"complex" => "curation/literature/go_protein_complex_slim.tab",
127127
"interaction" => "curation/literature/interaction_data.tab",
128128
"phenotype" => "curation/literature/phenotype_data.tab",
129-
"pathways" => "curation/literature/biochemical_pathways.tab",
130-
"mapping" => "mapping"
129+
"pathways" => "curation/literature/biochemical_pathways.tab"#,
130+
#"mapping" => "mapping"
131131
);
132132

133133
$graph_uri = parent::getGraphURI();
@@ -468,7 +468,7 @@ function features()
468468
);
469469
}
470470
if($a[1] == "ORF" && $a[4] != '') {
471-
$p2 = ucfirst(strtolower(str_replace(array("(",")"), array("%28","%29"), $a[4])))."p";
471+
$p2 = ucfirst(strtolower(str_replace(array("(",")",","), array("%28","%29","%2C"), $a[4])))."p";
472472
$p2label = "$p2";
473473
$this->AddRDF(
474474
parent::triplify($sid, $this->getVoc()."encodes", "sgd:$p2").
@@ -507,7 +507,7 @@ function features()
507507

508508
// common names
509509
if($a[4]) {
510-
$nid = str_replace(array("(",")"), array("%28","%29"), $a[4]);
510+
$nid = str_replace(array("(",")",","), array("%28","%29","%2C"), $a[4]);
511511
$this->AddRDF(
512512
parent::triplifyString($sid, $this->getVoc()."standardName", $a[4]).
513513
parent::triplify($sid, "owl:sameAs", "sgd:$nid").
@@ -526,7 +526,7 @@ function features()
526526
// parent feature
527527
$parent_type = '';
528528
if($a[6]) {
529-
$parent = str_replace(array("(",")"," "), array("%28","%29","_"), $a[6]);
529+
$parent = str_replace(array("(",")"," ",","), array("%28","%29","_","%2C"), $a[6]);
530530
$this->addRDF(
531531
parent::triplify($sid, $this->getVoc()."is-proper-part-of", $this->getRes().$parent).
532532
parent::describeProperty($this->getVoc()."is-proper-part-of", "Relationship between an SGD entity and an entity it is a proper part of")
@@ -1125,7 +1125,7 @@ function pathways(){
11251125

11261126
$eid = '';
11271127
if($a[3]) { // there is a protein
1128-
$eid = ucfirst(strtolower($a[3]))."p";
1128+
$eid = ucfirst(strtolower(str_replace(",","%2C",$a[3])))."p";
11291129
$this->AddRDF(
11301130
parent::triplify($this->getRes().$pid, $this->getVoc()."has-participant", $this->getRes().$eid)
11311131
);

0 commit comments

Comments
 (0)