@@ -108,8 +108,6 @@ function run() {
108
108
->setLicense ("http://creativecommons.org/licenses/by-nc-sa/3.0/ " )
109
109
->setDataset ("http://identifiers.org/sider.effect/ " );
110
110
111
- if ($ file == "label_mapping " ) $ source_file ->setLicense ("http://creativecommons.org/publicdomain/zero/1.0/ " );
112
-
113
111
$ prefix = parent ::getPrefix ();
114
112
$ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
115
113
$ date = parent ::getDate (filemtime ($ odir .$ ofile ));
@@ -141,131 +139,8 @@ function run() {
141
139
142
140
//reset graph URI to default value
143
141
parent ::setGraphURI ($ graph_uri );
144
-
145
142
}
146
- /*
147
- 1 & 2: generic and brand names
148
-
149
- 3: a marker if the drug could be successfully mapped to STITCH. Possible values:
150
- - [empty field]: success
151
- - combination: two or more drugs were combined
152
- - not found: could not find the name in the database
153
- - mapping conflict: the available names point to two different compounds
154
- - template: a package insert that contains information for a group of related drugs
155
-
156
- 4 & 5: STITCH compound ids, based on PubChem. Salt forms and stereo-isomers have been merged.
157
- Column 4: "flat compound", i.e. stereo-isomers have been merged into one compound
158
- Column 5: stereo-specific compound id
159
-
160
- To get the PubChem Compound Ids: take absolute value, for flat compounds ids: subtract 100000000
161
- E.g. aspirin: -100002244 --> 2244
162
-
163
- 6: URL of the downloaded PDF. This column is empty for FDA SPL labels, which are available in XML.
164
- Unfortunately, many links have become stale since the labels were downloaded in 2009.
165
-
166
- 7: label identifier
167
- */
168
- function label_mapping ()
169
- {
170
- parent ::setCheckpoint ('file ' );
171
-
172
- $ declared = null ;
173
- while ($ l = parent ::getReadFile ()->Read (1000000 )) {
174
- parent ::setCheckpoint ('record ' );
175
-
176
- $ a = explode ("\t" ,$ l );
177
- $ id = parent ::getNamespace ().urlencode (trim ($ a [6 ]));
178
-
179
- $ gnames_list = explode ("; " ,strtolower (trim ($ a [1 ])));
180
- array_unique ($ gnames_list );
181
- asort ($ gnames_list );
182
- $ gnames = implode (" + " ,$ gnames_list );
183
- if ($ a [2 ] == "combination " ) {
184
- $ label = "combination: $ gnames " ;
185
- $ type = "Combination-Drug " ;
186
- } else {
187
- if ($ a [0 ]) $ label .= $ a [0 ]." ( " .$ gnames .") " ;
188
- else $ label = $ gnames ;
189
- $ type = "Drug " ;
190
- }
191
-
192
- parent ::addRDF (
193
- parent ::describeIndividual ($ id , $ label , parent ::getVoc ().$ type ).
194
- parent ::describeClass (parent ::getVoc ().$ type ,"SIDER " .$ type )
195
- );
196
-
197
- // attempt to extract the spl id
198
- $ b = explode ("_ " ,trim ($ a [6 ]));
199
- if (isset ($ b [1 ])) {
200
- $ c = explode ("- " ,$ b [1 ]);
201
- if (count ($ c ) == 5 ) {
202
- // possibly an SPL id
203
- parent ::addRDF (parent ::triplify ($ id ,parent ::getVoc ()."x-spl " ,"dailymed: " .$ b [1 ]));
204
- }
205
- }
206
-
207
- if (trim ($ a [0 ])) {
208
- $ brand_label = strtolower (trim ($ a [0 ]));
209
- $ brand_qname = parent ::getRes ().md5 ($ brand_label );
210
- parent ::addRDF (
211
- parent ::describeIndividual ($ brand_qname , $ brand_label , parent ::getVoc ()."Brand-Drug-Name " ).
212
- parent ::describeClass (parent ::getVoc ()."Brand-Drug-Name " ,"Brand Drug Name " ).
213
- parent ::triplify ($ id , parent ::getVoc ()."brand-name " , $ brand_qname )
214
- );
215
- }
216
- if (trim ($ a [1 ])) {
217
- foreach ($ gnames_list AS $ generic_name ) {
218
- $ generic_label = trim ($ generic_name );
219
- $ generic_qname = parent ::getRes ().md5 ($ generic_label );
220
- parent ::addRDF (
221
- parent ::describeIndividual ($ generic_qname , $ generic_label , parent ::getVoc ()."Generic-Drug-Name " ).
222
- parent ::describeClass (parent ::getVoc ()."Generic-Drug-Name " ,"Generic Drug Name " ).
223
- parent ::triplify ($ id , parent ::getVoc ()."generic-name " , $ generic_qname )
224
- );
225
- }
226
- }
227
-
228
- if ($ a [2 ]){
229
- $ mapping_result = str_replace (" " ,"- " ,$ a [2 ]);
230
- parent ::addRDF (
231
- parent ::triplify ($ id , parent ::getVoc ()."mapping-result " , parent ::getVoc ().$ mapping_result )
232
- );
233
- }
234
-
235
- if ($ a [3 ]){
236
- parent ::addRDF (
237
- parent ::triplify ($ id , parent ::getVoc ()."stitch-flat-compound-id " , "stitch: " .$ a [3 ])
238
- );
239
-
240
- $ pubchemcompound = $ this ->GetPCFromFlat ($ a [3 ]);
241
- parent ::addRDF (
242
- parent ::triplify ($ id , parent ::getVoc ()."pubchem-flat-compound-id " , "pubchemcompound: " .$ pubchemcompound )
243
- );
244
- }
245
-
246
- if ($ a [4 ]){
247
- parent ::addRDF (
248
- parent ::triplify ($ id , parent ::getVoc ()."stitch-stereo-compound-id " , "stitch: " .$ a [4 ])
249
- );
250
- $ pubchemcompound = $ this ->GetPCFromStereo ($ a [4 ]);
251
- parent ::addRDF (
252
- parent ::triplify ($ id , parent ::getVoc ()."pubchem-stereo-compound-id " , "pubchemcompound: " .$ pubchemcompound )
253
- );
254
- }
255
143
256
- if ($ a [5 ]){
257
- $ url = str_replace (" " ,"+ " ,$ a [5 ]);
258
- parent ::addRDF (
259
- parent ::QQuadO_URL ($ id , parent ::getVoc ()."pdf-url " , $ url )
260
- );
261
- }
262
-
263
- parent ::setCheckpoint ('record ' );
264
-
265
- }
266
- parent ::setCheckpoint ('file ' );
267
- }
268
-
269
144
function GetPCFromFlat ($ id )
270
145
{
271
146
return ltrim (abs ($ id )-100000000 , "0 " );
@@ -313,12 +188,32 @@ function se()
313
188
);
314
189
$ declared [$ cui ] = '' ;
315
190
}
316
-
191
+ if (!isset ($ declared [$ stitch_flat ])) {
192
+ $ pubchem_id = "pubchem.compound: " .ltrim ( substr ($ stitch_flat ,4 ), "0 " );
193
+ $ stereo_id = "pubchem.compound: " .ltrim ( substr ($ stitch_stereo ,4 ), "0 " );
194
+ parent ::addRDF (
195
+ parent ::triplify ($ stitch_flat , "rdf:type " , parent ::getVoc ()."Flat-Compound " ).
196
+ parent ::describeClass (parent ::getVoc ()."Flat-Compound " , "Flat compound " ).
197
+ parent ::triplify ($ stitch_flat , parent ::getVoc ()."x-pubchem.compound " , $ pubchem_id ).
198
+ parent ::triplify ($ stitch_flat , parent ::getVoc ()."stitch-stereo " , $ stitch_stereo )
199
+ );
200
+ $ declared [$ stitch_flat ] = '' ;
201
+ }
202
+ if (!isset ($ declared [$ stitch_stereo ])) {
203
+ $ pubchem_id = "pubchem.compound: " .ltrim ( substr ($ stitch_stereo ,4 ), "0 " );
204
+ parent ::addRDF (
205
+ parent ::triplify ($ stitch_stereo , "rdf:type " , parent ::getVoc ()."Stereo-Compound " ).
206
+ parent ::describeClass (parent ::getVoc ()."Stereo-Compound " , "Stereo compound " ).
207
+ parent ::triplify ($ stitch_stereo , parent ::getVoc ()."x-pubchem.compound " , $ pubchem_id ).
208
+ parent ::triplify ($ stitch_stereo , parent ::getVoc ()."stitch-flat " , $ stitch_flat )
209
+ );
210
+ $ declared [$ stitch_stereo ] = '' ;
211
+ }
212
+
317
213
parent ::addRDF (
318
- parent ::describeIndividual ($ id , "$ stitch_flat $ cui_label side effect " , parent ::getVoc ()."Drug-Side-Effect " ).
319
- parent ::triplify ($ id , parent ::getVoc ()."side-effect " , $ cui ).
320
- parent ::triplify ($ id , parent ::getVoc ()."stitch-flat " , $ stitch_flat ).
321
- parent ::triplify ($ id , parent ::getVoc ()."stitch-stereo " , $ stitch_stereo )
214
+ parent ::describeIndividual ($ id , "$ stitch_flat $ cui_label effect " , parent ::getVoc ()."Drug-Effect-Association " ).
215
+ parent ::triplify ($ id , parent ::getVoc ()."effect " , $ cui ).
216
+ parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_flat )
322
217
);
323
218
parent ::setCheckpoint ('record ' );
324
219
}
@@ -343,7 +238,6 @@ function indications()
343
238
$ list [$ id ] = '' ;
344
239
}
345
240
346
-
347
241
$ stitch_id = "stitch: $ stitch_flat " ;
348
242
$ meddra_id = "meddra: $ cui " ;
349
243
@@ -353,6 +247,15 @@ function indications()
353
247
);
354
248
$ declared [$ cui ] = '' ;
355
249
}
250
+ if (!isset ($ declared [$ stitch_flat ])) {
251
+ $ pubchem_id = "pubchem.compound: " .ltrim ( substr ($ stitch_flat ,4 ), "0 " );
252
+ parent ::addRDF (
253
+ parent ::triplify ($ stitch_id , "rdf:type " , parent ::getVoc ()."Flat-Compound " ).
254
+ parent ::describeClass (parent ::getVoc ()."Flat-Compound " , "STITCH Flat compound " ).
255
+ parent ::triplify ($ stitch_id , parent ::getVoc ()."x-flat-pubchem.compound " , $ pubchem_id )
256
+ );
257
+ $ declared [$ stitch_flat ] = '' ;
258
+ }
356
259
357
260
parent ::addRDF (
358
261
parent ::describeIndividual ($ id , $ stitch_id ." - " .$ meddra_id ." indication " , parent ::getVoc ()."Drug-Indication-Association " ).
@@ -394,103 +297,74 @@ function freq()
394
297
$ i = 1 ;
395
298
parent ::setCheckpoint ('file ' );
396
299
while ($ l = parent ::getReadFile ()->read ()) {
397
- parent ::setCheckpoint ('record ' );
398
300
$ a = explode ("\t" ,str_replace ("% " ,"" ,$ l ));
399
301
if (count ($ a ) != $ cols ) {
400
302
trigger_error ("Expecting $ cols, but found " .count ($ a )." instead... skipping file! " , E_USER_ERROR );
401
303
return false ;
402
304
}
403
- list ($ stitch_flat , $ stitch_stereo , $ cui , $ placebo , $ freq , $ freq_lower , $ freq_upper , $ concept_type , $ meddra_concept_id , $ meddra_concept_label );
404
- $ id = "stitch_resource: " .md5 ("se_freq " .$ l );
405
- $ label = "side effect frequency of $ meddra_concept_label for $ stitch_id " ;
305
+ list ($ stitch_flat , $ stitch_stereo , $ cui , $ placebo , $ freq , $ freq_lower , $ freq_upper , $ concept_type , $ meddra_concept_id , $ meddra_concept_label ) = $ a ;
306
+ if ($ concept_type == "LLT " ) continue ;
307
+ $ meddra_concept_label = trim ($ meddra_concept_label );
308
+
309
+ $ id = "stitch_resource: " .md5 ("se_freq " .$ l );
310
+ $ stitch_flat = "stitch: $ stitch_flat " ;
311
+ $ label = "$ meddra_concept_label frequency for $ stitch_flat " ;
406
312
parent ::addRDF (
407
313
parent ::describeIndividual ($ id , $ label , parent ::getVoc ()."Drug-Effect-Frequency " ).
408
314
parent ::describeClass (parent ::getVoc ()."Drug-Effect-Frequency " ,"SIDER Drug-Effect and Frequency " ).
409
315
parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_flat ).
410
316
parent ::triplify ($ id , parent ::getVoc ()."effect " , "meddra: " .$ meddra_concept_id )
411
317
);
412
318
413
- if ($ a [ 5 ] ){
319
+ if ($ placebo ){
414
320
parent ::addRDF (
415
321
parent ::triplifyString ($ id , parent ::getVoc ()."placebo " , "true " , "xsd:boolean " )
416
322
);
417
323
}
418
324
419
- $ number = false ;
420
- if (is_numeric ($ freq )) {
421
- $ flabel = $ freq ."% " ;
422
- $ ftype_label = "Exact-Frequency " ;
423
- $ ftype = parent ::getVoc ().$ ftype_label ;
424
- $ number = true ;
425
- } else {
426
- $ flabel = $ freq ;
427
- $ ftype_label = "Qualitative-Frequency " ;
428
- $ ftype = parent ::getVoc ()."$ ftype_label;
429
- }
430
- if( $ freq_lower != $ freq_upper) {
431
- $ flabel .= " ($ freq_lower -$ freq_upper )";
432
- $ ftype_label = "Range-Frequency " ;
433
- $ ftype = parent ::getVoc ().$ ftype_label ;
434
- }
325
+ $ number = false ;
326
+ if (is_numeric ($ freq )) {
327
+ $ flabel = $ freq ."% " ;
328
+ $ ftype_label = "Exact-Frequency " ;
329
+ $ ftype = parent ::getVoc ().$ ftype_label ;
330
+ $ number = true ;
331
+ } else {
332
+ $ flabel = $ freq ;
333
+ $ ftype_label = "Qualitative-Frequency " ;
334
+ $ ftype = parent ::getVoc ()."$ ftype_label" ;
335
+ }
336
+ if ($ freq_lower != $ freq_upper ) {
337
+ $ flabel .= "( $ freq_lower- $ freq_upper) " ;
338
+ $ ftype_label = "Range-Frequency " ;
339
+ $ ftype = parent ::getVoc ().$ ftype_label ;
340
+ }
435
341
342
+ $ fid = $ id .md5 ($ a [5 ].$ a [6 ].$ a [8 ]);
343
+ parent ::addRDF (
344
+ parent ::triplify ($ id ,parent ::getVoc ()."frequency " ,$ fid ).
345
+ parent ::describeIndividual ($ fid ,$ flabel ,$ ftype ).
346
+ parent ::describeClass ($ ftype , $ ftype_label )
347
+ );
348
+
349
+ if ($ number == true ) {
436
350
parent ::addRDF (
437
- parent ::triplify ($ id ,parent ::getVoc ()."AQualitative-Frequency " ,$ fid ).
438
- parent ::describeIndividual ($ fid ,$ flabel ,$ ftype ).
439
- parent ::describeClass ($ ftype , $ ftype_label )
351
+ parent ::triplifyString ($ fid , parent ::getVoc ()."frequency-value " , $ freq /100 )
440
352
);
441
-
442
- if ($ number == true ) {
443
- parent ::addRDF (
444
- parent ::triplifyString ($ fid , parent ::getVoc ()."frequency " , $ a [6 ]/100 )
445
- );
446
- } else {
447
- parent ::addRDF (
448
- parent ::triplifyString ($ fid , parent ::getVoc ()."frequency " , $ a [6 ])
449
- );
450
- }
451
- // if($a[7] != $a[8]){
452
- parent ::addRDF (
453
- parent ::triplifyString ($ fid , parent ::getVoc ()."lower-frequency " , $ a [7 ]).
454
- parent ::triplifyString ($ fid , parent ::getVoc ()."upper-frequency " , $ a [8 ])
455
- );
456
- // }
457
-
458
- $ meddra_id = "umls: $ a [10 ]" ;
459
- $ label = "" ;
460
- if (trim ($ a [11 ])) $ label = strtolower (trim ($ a [11 ]));
461
- $ rel = "preferred-term " ;
462
- if ($ a [9 ] != "LLT " ) $ rel = "lower-level-term " ;
463
-
353
+ } else {
464
354
parent ::addRDF (
465
- parent ::triplify ($ fid , parent ::getVoc ().$ rel , $ meddra_id ).
466
- parent ::describeClass ($ meddra_id ,$ label )
355
+ parent ::triplifyString ($ fid , parent ::getVoc ()."frequency-value " , $ freq )
467
356
);
357
+ }
358
+ parent ::addRDF (
359
+ parent ::triplifyString ($ fid , parent ::getVoc ()."lower-frequency " , sprintf ("%.3f " ,$ freq_lower )).
360
+ parent ::triplifyString ($ fid , parent ::getVoc ()."upper-frequency " , sprintf ("%.3f " ,$ freq_upper ))
361
+ );
468
362
469
363
parent ::setCheckpoint ('record ' );
470
364
}
471
365
parent ::setCheckpoint ('file ' );
472
366
473
367
}
474
-
475
- /*
476
- meddra_adverse_effects.tsv.gz
477
- -----------------------------
478
368
479
- 1 & 2: STITCH compound ids (flat/stereo, see above)
480
- 3: UMLS concept id as it was found on the label
481
- 4: drug name
482
- 5: side effect name
483
- 6: MedDRA concept type (LLT = lowest level term, PT = preferred term)
484
- 7: UMLS concept id for MedDRA term
485
- 8: MedDRA side effect name
486
-
487
- All side effects found on the labels are given as LLT. Additionally, the PT is shown. There is at least one
488
- PT for every side effect, but sometimes the PT is the same as the LLT.
489
- */
490
- // @TODO
491
- function meddra_adverse_effects ()
492
- {
493
-
494
- }
495
369
}
496
370
?>
0 commit comments