@@ -39,28 +39,17 @@ function __construct($argv) {
39
39
parent ::__construct ($ argv , "sider " );
40
40
41
41
// set and print application parameters
42
- parent ::addParameter ('files ' ,true ,'all|label_mapping|adverse_effects_raw|indications_raw|meddra_freq_parsed ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
42
+ parent ::addParameter ('files ' ,true ,'all|indications|se|freq ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
43
43
parent ::addParameter ('download_url ' ,false ,null ,'http://sideeffects.embl.de/media/download/ ' );
44
44
45
45
parent ::initialize ();
46
46
}
47
47
48
48
function run () {
49
-
50
- if (parent ::getParameterValue ('download ' ) === true )
51
- {
52
- $ this ->download ();
53
- }
54
- if (parent ::getParameterValue ('process ' ) === true )
55
- {
56
- $ this ->process ();
57
- }
58
-
59
- }
60
-
61
- function download (){
62
49
$ idir = parent ::getParameterValue ('indir ' );
50
+ $ odir = parent ::getParameterValue ('outdir ' );
63
51
$ files = parent ::getParameterValue ('files ' );
52
+ $ dataset_description = '' ;
64
53
65
54
if ($ files == 'all ' ) {
66
55
$ files = explode ('| ' , parent ::getParameterList ('files ' ));
@@ -70,8 +59,11 @@ function download(){
70
59
}
71
60
72
61
foreach ($ files AS $ file ) {
73
- $ lfile = $ idir .$ file .'.tsv.gz ' ;
74
- $ rfile = parent ::getParameterValue ('download_url ' ).$ file .'.tsv.gz ' ;
62
+ $ f = $ file ;
63
+ if ($ file != "freq " ) $ f = "all_ " .$ file ;
64
+ $ f = "meddra_ " .$ f .".tsv.gz " ;
65
+ $ lfile = $ idir .$ f ;
66
+ $ rfile = parent ::getParameterValue ('download_url ' ).$ f ;
75
67
if (!file_exists ($ lfile ) || parent ::getParameterValue ('download ' ) == 'true ' ) {
76
68
echo "downloading $ file... " ;
77
69
$ ret = file_get_contents ($ rfile );
@@ -86,43 +78,15 @@ function download(){
86
78
}
87
79
echo "done! " .PHP_EOL ;
88
80
}
89
- }//foreach
90
- }
91
-
92
- function process (){
93
-
94
- $ idir = parent ::getParameterValue ('indir ' );
95
- $ odir = parent ::getParameterValue ('outdir ' );
96
- $ files = parent ::getParameterValue ('files ' );
97
-
98
- if ($ files == 'all ' ) {
99
- $ files = explode ('| ' , parent ::getParameterList ('files ' ));
100
- array_shift ($ files );
101
- } else {
102
- $ files = explode (', ' , parent ::getParameterValue ('files ' ));
103
- }
104
-
105
- parent ::setCheckpoint ('dataset ' );
106
-
107
- $ dataset_description = '' ;
108
-
109
- $ graph_uri = parent ::getGraphURI ();
110
- if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
111
81
112
- foreach ($ files AS $ file ) {
113
- $ lfile = $ idir .$ file .'.tsv.gz ' ;
114
- $ rfile = parent ::getParameterValue ('download_url ' ).$ file .'.tsv.gz ' ;
115
-
116
- echo "Processing $ file... " ;
82
+ echo "Processing $ f... " ;
117
83
parent ::setReadFile ($ lfile ,true );
118
84
119
85
$ suffix = parent ::getParameterValue ('output_format ' );
120
86
$ ofile = "sider- " .$ file .'. ' .$ suffix ;
121
87
$ gz = false ;
122
88
123
- if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
124
- $ gz = true ;
125
- }
89
+ if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) $ gz = true ;
126
90
127
91
parent ::setWriteFile ($ odir .$ ofile , $ gz );
128
92
$ this ->$ file ();
@@ -320,43 +284,83 @@ function GetPCFromStereo($id)
320
284
Format: label identifier, concept id, name of side effect (as found on the label)
321
285
*/
322
286
323
- function adverse_effects_raw ()
287
+ function se ()
324
288
{
325
289
$ declared = null ;
326
290
327
291
parent ::setCheckpoint ('file ' );
328
- while ($ l = $ this ->GetReadFile ()->Read ()) {
292
+ while ($ l = $ this ->getReadFile ()->Read ()) {
329
293
$ a = explode ("\t" ,$ l );
330
- $ id = "sider: " .urlencode ($ a [0 ]);
331
- $ cui = "umls: " .$ a [1 ];
332
- $ cui_label = strtolower (trim ($ a [2 ]));
294
+ if (count ($ a ) != 6 ) {
295
+ trigger_error ("Expecting 6 columns, found " .count ($ a )." instead. " , E_USER_ERROR );
296
+ exit ;
297
+ }
298
+ $ stitch_flat = "stitch: " .$ a [0 ];
299
+ $ stitch_stereo = "stitch: " .$ a [1 ];
300
+ $ cui = "umls: " .$ a [2 ];
301
+ $ term_type = $ a [3 ];
302
+ $ term_type_cui = $ a [4 ];
303
+ $ term_type_label = $ a [5 ];
304
+
305
+ if ($ term_type == 'LLT ' ) continue ;
306
+
307
+ $ id = "sider: " .md5 ("se " .$ stitch_flat .$ cui );
308
+
309
+ $ cui_label = strtolower (trim ($ term_type_label ));
310
+ if (!isset ($ declared [$ cui ])) {
311
+ parent ::addRDF (
312
+ parent ::describeClass ($ cui , $ cui_label )
313
+ );
314
+ $ declared [$ cui ] = '' ;
315
+ }
316
+
333
317
parent ::addRDF (
334
- parent ::describeClass ($ cui , $ cui_label ).
335
- parent ::triplify ($ id , parent ::getVoc ()."side-effect " , $ cui )
318
+ parent ::describeIndividual ($ id , "$ stitch_flat $ cui_label side effect " , parent ::getVoc ()."Drug-Side-Effect " ).
319
+ parent ::triplify ($ id , parent ::getVoc ()."side-effect " , $ cui ).
320
+ parent ::triplify ($ id , parent ::getVoc ()."stitch-flat " , $ stitch_flat ).
321
+ parent ::triplify ($ id , parent ::getVoc ()."stitch-stereo " , $ stitch_stereo )
336
322
);
337
323
parent ::setCheckpoint ('record ' );
338
324
}
325
+
339
326
parent ::setCheckpoint ('file ' );
340
327
}
341
328
342
- function indications_raw ()
329
+ function indications ()
343
330
{
344
331
$ declared = null ;
345
-
332
+ $ list = null ;
346
333
parent ::setCheckpoint ('file ' );
347
- while ($ l = $ this ->GetReadFile ()->Read ()) {
334
+ while ($ l = $ this ->getReadFile ()->Read ()) {
348
335
parent ::setCheckpoint ('record ' );
349
336
350
337
$ a = explode ("\t" ,$ l );
351
- $ id = "sider: " .urlencode ($ a [0 ]);
352
- $ cui = "umls: " .$ a [1 ];
353
- $ cui_label = strtolower (trim ($ a [2 ]));
338
+ list ($ stitch_flat ,$ cui ,$ provenance ,$ cui_label ,$ term_type ,$ term_cui ,$ term_cui_label ) = $ a ;
339
+ $ id = "sider: " .md5 ("i " .$ stitch_flat .$ cui );
340
+
341
+ if ($ term_type == "LLT " or isset ($ list [$ id ])) continue ;
342
+ if (!isset ($ list [$ id ])) {
343
+ $ list [$ id ] = '' ;
344
+ }
345
+
346
+
347
+ $ stitch_id = "stitch: $ stitch_flat " ;
348
+ $ meddra_id = "meddra: $ cui " ;
349
+
350
+ if (!isset ($ declared [$ cui ])) {
351
+ parent ::addRDF (
352
+ parent ::describeClass ($ meddra_id , $ cui_label )
353
+ );
354
+ $ declared [$ cui ] = '' ;
355
+ }
354
356
355
357
parent ::addRDF (
356
- parent ::describeClass ($ cui , $ cui_label ).
357
- parent ::triplify ($ id , parent ::getVoc ()."indication " , $ cui )
358
+ parent ::describeIndividual ($ id , $ stitch_id ." - " .$ meddra_id ." indication " , parent ::getVoc ()."Drug-Indication-Association " ).
359
+ parent ::describeClass (parent ::getVoc ()."Drug-Indication-Association " ,"Drug-Disease Association " ).
360
+ parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_id ).
361
+ parent ::triplify ($ id , parent ::getVoc ()."indication " , $ meddra_id ).
362
+ parent ::triplifyString ($ id , parent ::getVoc ()."provenance " , $ provenance )
358
363
);
359
- parent ::setCheckpoint ('record ' );
360
364
361
365
}
362
366
parent ::setCheckpoint ('file ' );
@@ -384,30 +388,26 @@ function indications_raw()
384
388
matches the upper bound. Due to the nature of the data, there can be more than one frequency for the same label,
385
389
e.g. from different clinical trials or for different levels of severeness.
386
390
*/
387
- function meddra_freq_parsed ()
391
+ function freq ()
388
392
{
389
- $ cols = 12 ;
393
+ $ cols = 10 ;
390
394
$ i = 1 ;
391
395
parent ::setCheckpoint ('file ' );
392
396
while ($ l = parent ::getReadFile ()->read ()) {
393
397
parent ::setCheckpoint ('record ' );
394
-
395
398
$ a = explode ("\t" ,str_replace ("% " ,"" ,$ l ));
396
399
if (count ($ a ) != $ cols ) {
397
- trigger_error ("Expecting $ cols, but found " .count ($ a )." instead... skipping file! " );
400
+ trigger_error ("Expecting $ cols, but found " .count ($ a )." instead... skipping file! " , E_USER_ERROR );
398
401
return false ;
399
402
}
400
- $ label = $ a [2 ];
401
- $ label_id = parent ::getNamespace ().urlencode ($ label );
402
- $ effect_id = "umls: " .$ a [3 ];
403
-
404
- $ id = parent ::getRes ().md5 ($ a [2 ].$ a [3 ].$ a [6 ]);
405
- $ label = "$ a [4 ] in $ label $ a [2 ]" ;
403
+ list ($ stitch_flat , $ stitch_stereo , $ cui , $ placebo , $ freq , $ freq_lower , $ freq_upper , $ concept_type , $ meddra_concept_id , $ meddra_concept_label );
404
+ $ id = "stitch_resource: " .md5 ("se_freq " .$ l );
405
+ $ label = "side effect frequency of $ meddra_concept_label for $ stitch_id " ;
406
406
parent ::addRDF (
407
- parent ::describeIndividual ($ id , $ label , parent ::getVoc ()."Drug-Effect " ).
408
- parent ::describeClass (parent ::getVoc ()."Drug-Effect " ,"SIDER Drug-Effect " ).
409
- parent ::triplify ($ id , parent ::getVoc ()."drug " , $ label_id ).
410
- parent ::triplify ($ id , parent ::getVoc ()."effect " , $ effect_id )
407
+ parent ::describeIndividual ($ id , $ label , parent ::getVoc ()."Drug-Effect-Frequency " ).
408
+ parent ::describeClass (parent ::getVoc ()."Drug-Effect-Frequency " ,"SIDER Drug-Effect and Frequency " ).
409
+ parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_flat ).
410
+ parent ::triplify ($ id , parent ::getVoc ()."effect " , " meddra: " . $ meddra_concept_id )
411
411
);
412
412
413
413
if ($ a [5 ]){
@@ -416,25 +416,25 @@ function meddra_freq_parsed()
416
416
);
417
417
}
418
418
419
- $ fid = $ id .md5 ($ a [5 ].$ a [6 ].$ a [7 ].$ a [8 ]);
420
- // $fid = $id.($i++);
421
- $ flabel = $ a [6 ];
422
- $ ftype = parent ::getVoc ().ucfirst ($ a [6 ])."-Frequency " ;
423
419
$ number = false ;
424
- if (is_numeric ($ a [ 6 ] )) {
425
- $ flabel = $ a [ 6 ] ."% " ;
426
- $ ftype_label = "Specified -Frequency " ;
420
+ if (is_numeric ($ freq )) {
421
+ $ flabel = $ freq ."% " ;
422
+ $ ftype_label = "Exact -Frequency " ;
427
423
$ ftype = parent ::getVoc ().$ ftype_label ;
428
424
$ number = true ;
425
+ } else {
426
+ $ flabel = $ freq ;
427
+ $ ftype_label = "Qualitative-Frequency " ;
428
+ $ ftype = parent ::getVoc ()."$ ftype_label;
429
429
}
430
- if ($ a [ 7 ] != $ a [ 8 ] ) {
431
- $ flabel .= "( $ a [ 7 ] - $ a [ 8 ] ) " ;
430
+ if( $ freq_lower != $ freq_upper ) {
431
+ $ flabel .= " ($ freq_lower - $ freq_upper )";
432
432
$ ftype_label = "Range-Frequency " ;
433
433
$ ftype = parent ::getVoc ().$ ftype_label ;
434
- }
434
+ }
435
435
436
436
parent ::addRDF (
437
- parent ::triplify ($ id ,parent ::getVoc ()."reported-frequency " ,$ fid ).
437
+ parent ::triplify ($ id ,parent ::getVoc ()."AQualitative-Frequency " ,$ fid ).
438
438
parent ::describeIndividual ($ fid ,$ flabel ,$ ftype ).
439
439
parent ::describeClass ($ ftype , $ ftype_label )
440
440
);
0 commit comments