@@ -39,7 +39,7 @@ class PharmGKBParser extends Bio2RDFizer
39
39
40
40
function __construct ($ argv ) {
41
41
parent ::__construct ($ argv , "pharmgkb " );
42
- $ this ->AddParameter ('files ' ,true ,'all|drugs|genes|diseases|relationships|pathways| rsid|variant_annotations|offsides|twosides ' ,'all ' ,'all or comma-separated list of files to process ' );
42
+ $ this ->AddParameter ('files ' ,true ,'all|drugs|genes|diseases|relationships|rsid|variant_annotations|offsides|twosides ' ,'all ' ,'all or comma-separated list of files to process ' ); /** pathways **/
43
43
$ this ->AddParameter ('download_url ' ,false ,null ,'http://www.pharmgkb.org/commonFileDownload.action?filename= ' );
44
44
parent ::initialize ();
45
45
}
@@ -155,9 +155,9 @@ function process(){
155
155
$ zipentries = array ('clinical_ann_metadata.tsv ' ,'var_drug_ann.tsv ' ,'var_pheno_ann.tsv ' ,'var_fa_ann.tsv ' ); //'study_parameters.tsv'
156
156
else if ($ file == "pathways " ) {
157
157
for ( $ i = 0 ; $ i < $ zin ->numFiles ; $ i ++ ){
158
- $ stat = $ zin ->statIndex ( $ i );
159
- $ entry = $ stat ['name ' ];
160
- $ ext = pathinfo ($ entry , PATHINFO_EXTENSION );
158
+ $ stat = $ zin ->statIndex ( $ i );
159
+ $ entry = $ stat ['name ' ];
160
+ $ ext = pathinfo ($ entry , PATHINFO_EXTENSION );
161
161
if ($ ext != "txt " ){
162
162
$ zipentries [] = $ entry ;
163
163
}
@@ -191,17 +191,19 @@ function process(){
191
191
if ($ zipentry == "clinical_ann_metadata.tsv " ) $ fnx = "clinical_ann_metadata " ;
192
192
else $ fnx = 'variant_annotation ' ;
193
193
echo "processing $ zipentry.. " ;
194
+ } else if ($ file == 'pathways ' ) {
195
+ $ fnx = 'pathways ' ;
196
+ echo "processing $ fnx ( $ zipentry)... " ;
194
197
} else {
195
198
$ fnx = $ file ;
196
- echo "processing $ fnx... " ;
199
+ echo "processing $ fnx ... " ;
197
200
}
198
201
199
202
$ this ->$ fnx ();
200
203
parent ::writeRDFBufferToWriteFile ();
201
204
echo "done! " .PHP_EOL ;
202
205
203
206
// generate the dataset release file
204
- echo "Generating dataset description... " ;
205
207
$ source_file = (new DataResource ($ this ))
206
208
->setURI ($ rfile )
207
209
->setTitle ("Pharmacogenomics Knowledge Base ( $ zipentry) " )
@@ -240,7 +242,8 @@ function process(){
240
242
}
241
243
$ this ->GetWriteFile ()->Close ();
242
244
} // foreach
243
-
245
+
246
+ echo "Generating dataset description... " ;
244
247
parent ::setGraphURI ($ graph_uri );
245
248
parent ::setWriteFile ($ odir .parent ::getBio2RDFReleaseFile ());
246
249
parent ::getWriteFile ()->write ($ dataset_description );
@@ -259,16 +262,20 @@ function process(){
259
262
5 Alternate Names
260
263
6 Alternate Symbols
261
264
7 Is Genotyped
262
- 9 Is VIP
263
- 12 Has Variant Annotation
265
+ 8 Is VIP
266
+ 9 Has Variant Annotation
267
+ 10 Has CPIC Dosing Guideline
264
268
*/
265
269
function genes ()
266
270
{
267
- if (($ n = count (explode ("\t" ,$ this ->GetReadFile ()->Read ()))) != 10 ) {
268
- trigger_error ("Found $ n columns in gene file - expecting 10! " , E_USER_WARNING );
271
+ $ h = explode ("\t" ,parent ::getReadFile ()->read ());
272
+ $ expected_columns = 11 ;
273
+ if (($ n = count ($ h )) != $ expected_columns ) {
274
+ trigger_error ("Found $ n columns in gene file - expecting $ expected_columns! " , E_USER_WARNING );
275
+ return false ;
269
276
}
270
277
271
- while ($ l = $ this -> GetReadFile ()->Read (200000 )) {
278
+ while ($ l = parent :: getReadFile ()->read (200000 )) {
272
279
$ a = explode ("\t" ,$ l );
273
280
274
281
$ id = parent ::getNamespace ().$ a [0 ];
@@ -401,32 +408,26 @@ function MapXrefs($xref, &$url = false)
401
408
}
402
409
return $ ns .": " .$ id ;
403
410
}
404
- /*
405
- 0 PharmGKB Accession Id
406
- 1 Name
407
- 2 Generic Names
408
- 3 Trade Names
409
- 4 Brand Mixtures
410
- 5 Type
411
- 6 Cross References
412
- 7 SMILES
413
- 8 External Vocabulary
414
-
415
- 0 PA164748388
416
- 1 diphemanil methylsulfate
417
- 2
418
- 3 Prantal
419
- 4
420
- 5 Drug/Small Molecule
421
- 6 drugBank:DB00729,pubChemCompound:6126,pubChemSubstance:149020
422
- 7
423
- 8 ATC:A03AB(Synthetic anticholinergics, quaternary ammonium compounds)
424
-
425
- */
411
+ /*
412
+ [0] => PharmGKB Accession Id
413
+ [1] => Name
414
+ [2] => Generic Names
415
+ [3] => Trade Names
416
+ [4] => Brand Mixtures
417
+ [5] => Type
418
+ [6] => Cross-references
419
+ [7] => SMILES
420
+ [8] => Dosing Guideline
421
+ [9] => External Vocabulary
422
+ */
426
423
function drugs ()
427
424
{
428
425
$ declared = '' ;
429
- $ this ->GetReadFile ()->Read (1000 ); // first line is header
426
+ $ h = explode ("\t" ,$ this ->GetReadFile ()->Read (1000 )); // first line is header
427
+ if (count ($ h ) != 10 ) {
428
+ trigger_error ("Change in number of columns for drugs file " ,E_USER_ERROR );
429
+ return FALSE ;
430
+ }
430
431
while ($ l = $ this ->GetReadFile ()->Read (200000 )) {
431
432
$ a = explode ("\t" ,$ l );
432
433
$ id = parent ::getNamespace ().$ a [0 ];
@@ -503,17 +504,17 @@ function drugs()
503
504
}
504
505
}
505
506
}
506
- if (trim ($ a [8 ])) {
507
+ if (trim ($ a [9 ])) {
507
508
// External Vocabulary
508
509
// ATC:H01AC(Somatropin and somatropin agonists),ATC:V04CD(Tests for pituitary function)
509
510
// ATC:D07AB(Corticosteroids, moderately potent (group II)) => this is why you don't use brackets and commas as separators.
510
- $ b = explode (', ' ,trim ($ a [8 ]),2 );
511
+ $ b = explode (', ' ,trim ($ a [9 ]),2 );
511
512
foreach ($ b as $ c ) {
512
513
preg_match_all ("/ATC:([A-Z0-9]+)\((.*)\)$/ " ,$ c ,$ m );
513
514
if (isset ($ m [1 ][0 ])) {
514
515
$ atc = "atc: " .$ m [1 ][0 ];
515
516
parent ::addRDF (
516
- parent ::triplify ($ id , parent ::getVoc ()."xref " , $ atc )
517
+ parent ::triplify ($ id , parent ::getVoc ()."x-atc " , $ atc )
517
518
);
518
519
if (!isset ($ declared [$ atc ])) {
519
520
$ declared [$ atc ] = '' ;
@@ -524,18 +525,25 @@ function drugs()
524
525
}
525
526
}
526
527
}
528
+ parent ::WriteRDFBufferToWriteFile ();
527
529
}
528
- parent ::WriteRDFBufferToWriteFile ();
529
530
}
530
531
531
- /*
532
- 0 PharmGKB Accession Id
533
- 1 Name
534
- 2 Alternate Names
535
- */
532
+ /*
533
+ [0] => PharmGKB Accession Id
534
+ [1] => Name
535
+ [2] => Alternate Names
536
+ [3] => Cross-references
537
+ [4] => External Vocabulary
538
+ */
536
539
function diseases ()
537
540
{
538
- $ this ->GetReadFile ()->Read (10000 );
541
+ $ h = explode ("\t" ,$ this ->GetReadFile ()->Read (10000 )); // first line is header
542
+ if (count ($ h ) != 5 ) {
543
+ trigger_error ("Change in number of columns for diseases file " ,E_USER_ERROR );
544
+ return FALSE ;
545
+ }
546
+
539
547
while ($ l = $ this ->GetReadFile ()->Read (10000 )) {
540
548
$ a = explode ("\t" ,$ l );
541
549
@@ -571,24 +579,24 @@ function diseases()
571
579
parent ::addRDF (
572
580
parent ::triplify ($ id , "owl:sameAs " , $ sameID )
573
581
);
574
- if (isset ($ a [4 ]) && trim ($ a [4 ]) != '' ) {
575
- $ d = preg_match_all ('/(MeSH|SnoMedCT|UMLS ):([A-Z0 -9]+)\(([^\)]+)\)/ ' ,$ a [4 ],$ m , PREG_SET_ORDER );
582
+ if (isset ($ a [4 ]) && trim ($ a [4 ]) != '' ) {
583
+ $ d = preg_match_all ('/[,]?([^\:]+ ):([A-Za-z0 -9]+)\(([^\)]+)\)/ ' ,$ a [4 ],$ m , PREG_SET_ORDER );
576
584
foreach ($ m AS $ n ) {
577
- $ n [1 ] = strtolower ($ n [1 ]);
578
- if ($ n [1 ] == 'snomedct ' ) $ n [1 ] = 'snomed ' ;
579
- $ id2 = $ n [1 ].': ' .$ n [2 ];
580
- parent ::addRDF (
581
- parent ::triplify ($ id , "rdfs:seeAlso " , $ id2 )
582
- );
583
- if (isset ($ n [3 ]) && $ n [2 ] != $ n [3 ]){
585
+ if (isset ($ n [1 ]) && isset ($ n [2 ]) && !strstr ($ n [1 ]," " )) {
586
+ $ id2 = $ n [1 ].': ' .$ n [2 ];
584
587
parent ::addRDF (
585
- parent ::triplifyString ( $ id2 , "rdfs:label " , str_replace ( array ( " \' " , "\"" ), array ( "\\ \' " , "" ), $ n [ 3 ]) )
588
+ parent ::triplify ( $ id , "pharmgkb_vocabulary:xref " , $ id2 )
586
589
);
590
+ if (isset ($ n [3 ]) && $ n [2 ] != $ n [3 ]){
591
+ parent ::addRDF (
592
+ parent ::triplifyString ($ id2 , "rdfs:label " , str_replace (array ("\' " , "\"" ),array ("\\\' " , "" ),$ n [3 ]))
593
+ );
594
+ }
587
595
}
588
- }
596
+ }
589
597
}
598
+ parent ::writeRDFBufferToWriteFile ();
590
599
}
591
- parent ::writeRDFBufferToWriteFile ();
592
600
}
593
601
594
602
/*
@@ -608,7 +616,12 @@ function diseases()
608
616
function variant_annotations ()
609
617
{
610
618
$ hash = '' ; // md5 hash list
611
- $ this ->GetReadFile ()->Read ();
619
+ $ h = explode ("\t" ,$ this ->GetReadFile ()->Read (100000 )); // first line is header
620
+ if (count ($ h ) != 12 ) {
621
+ triger_error ("Change in number of columns for variant annotations file " ,E_USER_ERROR );
622
+ return FALSE ;
623
+ }
624
+
612
625
while ($ l = $ this ->GetReadFile ()->Read (10000 )) {
613
626
$ a = explode ("\t" ,$ l );
614
627
$ id = parent ::getNamespace ().$ a [11 ];
@@ -736,36 +749,36 @@ function variant_annotations()
736
749
}
737
750
738
751
/*
739
- Entity1_id - PA267, rs5186, Haplotype for PA121
740
- Entity1_type - Drug, Gene, VariantLocation, Disease, Haplotype, Association
741
- Entity2_id - PA267, rs5186, Haplotype for PA121
742
- Entity2_type - Drug, Gene, VariantLocation, Disease, Haplotype, Association
743
- Evidence - VariantAnnotation, Pathway, VIP, ClinicalAnnotation, DosingGuideline, DrugLabel, Annotation
744
- Evidence Sources - Publication
745
- Pharmacodynamic - Y
746
- Pharmacokinetic - Y
747
-
748
- Entity1_id Entity1_type Entity2_id Entity2_type Evidence Association PK PD PMIDs
749
- PA445738 Disease PA134866404 Gene VariantAnnotation associated PD 21912425
750
-
752
+ 0 Entity1_id - PA267, rs5186, Haplotype for PA121
753
+ 1 Entity1_name
754
+ 2 Entity1_type - Drug, Gene, VariantLocation, Disease, Haplotype, Association
755
+ 3 Entity2_id - PA267, rs5186, Haplotype for PA121
756
+ 4 Entity2_name
757
+ 5 Entity2_type - Drug, Gene, VariantLocation, Disease, Haplotype, Association
758
+ 6 Evidence - VariantAnnotation, Pathway, VIP, ClinicalAnnotation, DosingGuideline, DrugLabel, Annotation
759
+ 7 Association
760
+ 8 Pharmacokinetic - Y
761
+ 9 P harmacodynamic - Y
762
+ 10 PMIDS
751
763
*/
752
764
function relationships ()
753
765
{
754
766
$ declared = '' ;
755
767
$ hash = '' ; // md5 hash list
756
- $ this ->GetReadFile ()->Read ();
768
+ $ h = explode ("\t" , $ this ->GetReadFile ()->Read ());
769
+ if (count ($ h ) != 11 ) {
770
+ trigger_error ("Change in number of columns for relationships file (again) " );
771
+ return FALSE ;
772
+ }
773
+
757
774
while ($ l = $ this ->GetReadFile ()->Read (10000 )) {
758
775
$ a = explode ("\t" ,$ l );
759
- if (count ($ a ) != 9 ) {
760
- trigger_error ("Change in number of columns for relationships file " );
761
- return FALSE ;
762
- }
763
776
764
777
// id1
765
778
$ ns1 = parent ::getNamespace ();
766
779
$ id1 = $ a [0 ];
767
780
$ id1 = str_replace (" " ,"_ " ,$ id1 );
768
- $ type1 = $ a [1 ];
781
+ $ type1 = $ a [2 ];
769
782
if ($ id1 [0 ] == 'r ' ) {
770
783
$ ns1 = 'dbsnp: ' ;
771
784
} else if ($ id1 [0 ] == 'H ' ) {
@@ -774,9 +787,9 @@ function relationships()
774
787
775
788
// id2
776
789
$ ns2 = parent ::getNamespace ();
777
- $ id2 = $ a [2 ];
790
+ $ id2 = $ a [3 ];
778
791
$ id2 = str_replace (" " ,"_ " ,$ id2 );
779
- $ type2 = $ a [3 ];
792
+ $ type2 = $ a [5 ];
780
793
if ($ id2 [0 ] == 'r ' ) {
781
794
$ ns2 = 'dbsnp: ' ;
782
795
} else if ($ id2 [0 ] == 'H ' ) {
@@ -791,15 +804,15 @@ function relationships()
791
804
$ id = parent ::getRes ()."association_ " .$ id1 ."_ " .$ id2 ;
792
805
$ association = $ type1 .' ' .$ type2 .' Association ' ;
793
806
parent ::addRDF (
794
- parent ::describeIndividual ($ id , $ assocation , parent ::getVoc ().strtolower ($ type1 )."- " .strtolower ($ type2 )."-Association " ).
807
+ parent ::describeIndividual ($ id , $ association , parent ::getVoc ().strtolower ($ type1 )."- " .strtolower ($ type2 )."-Association " ).
795
808
parent ::triplify ($ id , parent ::getVoc ().strtolower ($ type1 ), $ ns1 .$ id1 ).
796
809
parent ::triplify ($ id , parent ::getVoc ().strtolower ($ type2 ), $ ns2 .$ id2 ).
797
810
parent ::describeClass (parent ::getVoc ().strtolower ($ type1 )."- " .strtolower ($ type2 )."-Association " , "PharmGKB $ type1 $ type2 Association " ).
798
811
parent ::describeProperty (parent ::getVoc ().strtolower ($ type1 ), "Relationship between a PharmGKB association and a $ type1 " ).
799
812
parent ::describeProperty (parent ::getVoc ().strtolower ($ type2 ), "Relationship between a PharmGKB association and a $ type2 " )
800
813
);
801
814
802
- $ b = explode (', ' ,$ a [4 ]);
815
+ $ b = explode (', ' ,$ a [7 ]);
803
816
foreach ($ b AS $ c ) {
804
817
parent ::addRDF (
805
818
parent ::triplifyString ($ id , parent ::getVoc ()."association_type " , $ c )
@@ -809,27 +822,27 @@ function relationships()
809
822
parent ::describeProperty (parent ::getVoc ()."association_type " , "Relationship between a PharmGKB association and its type " )
810
823
);
811
824
812
- if ($ a [6 ]){
825
+ if ($ a [8 ]){
813
826
parent ::addRDF (
814
827
parent ::triplifyString ($ id , parent ::getVoc ()."pk_relationship " , "true " )
815
828
);
816
829
}
817
- if ($ a [7 ]){
830
+ if ($ a [9 ]){
818
831
parent ::addRDF (
819
832
parent ::triplifyString ($ id , parent ::getVoc ()."pd_relationship " , "true " )
820
833
);
821
834
}
822
- $ a [8 ] = trim ($ a [8 ]);
823
- if ($ a [8 ]) {
824
- $ b = explode (', ' ,$ a [8 ]);
835
+ $ a [10 ] = trim ($ a [10 ]);
836
+ if ($ a [10 ]) {
837
+ $ b = explode ('; ' ,$ a [10 ]);
825
838
foreach ($ b AS $ pubmed_id ) {
826
839
parent ::addRDF (
827
840
parent ::triplify ($ id , parent ::getVoc ()."article " , "pubmed: " .$ pubmed_id )
828
841
);
829
842
}
830
843
}
844
+ parent ::writeRDFBufferToWriteFile ();
831
845
}
832
- parent ::writeRDFBufferToWriteFile ();
833
846
}
834
847
835
848
@@ -1231,10 +1244,10 @@ function variant_annotation()
1231
1244
1232
1245
function pathways ()
1233
1246
{
1247
+ /** @todo changed completely */
1234
1248
$ entry = false ;
1235
1249
while ($ l = $ this ->GetReadFile ()->Read (20000 )) {
1236
1250
$ a = explode ("\t" ,trim ($ l ));
1237
- print_r ($ a );
1238
1251
if (strlen (trim ($ l )) == 0 ) {
1239
1252
// end of entry
1240
1253
$ entry = false ;
@@ -1273,8 +1286,8 @@ function pathways()
1273
1286
parent ::describeProperty (parent ::getVoc ()."chemical " , "Relationship between a PharmGKB entity and a chemical " )
1274
1287
);
1275
1288
}
1289
+ parent ::writeRDFBufferToWriteFile ();
1276
1290
}
1277
- parent ::writeRDFBufferToWriteFile ();
1278
1291
}
1279
1292
1280
1293
/*
0 commit comments