@@ -35,14 +35,13 @@ class ORPHANETParser extends Bio2RDFizer
35
35
{
36
36
private $ filemap = array (
37
37
'disease ' => 'en_product1.xml ' ,
38
- 'epi ' => 'en_product9_prev.xml ' ,
38
+ 'prevalence ' => 'en_product9_prev.xml ' ,
39
39
'phenotypefreq ' => 'en_product4.xml ' ,
40
- # 'signs' => 'en_product5.xml',
41
40
'genes ' => 'en_product6.xml '
42
41
);
43
42
function __construct ($ argv ) {
44
43
parent ::__construct ($ argv , "orphanet " );
45
- parent ::addParameter ('files ' ,true ,'all|disease|phenotypefreq|genes ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
44
+ parent ::addParameter ('files ' ,true ,'all|disease|genes| phenotypefreq|prevalence ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
46
45
parent ::addParameter ('download_url ' ,false ,null ,'http://www.orphadata.org/data/xml/ ' );
47
46
parent ::initialize ();
48
47
}
@@ -196,52 +195,121 @@ function disease($file)
196
195
unset($ xml );
197
196
}
198
197
199
- function epi ($ file )
198
+ function prevalence ($ file )
200
199
{
201
200
$ seen = '' ;
202
201
$ xml = new CXML ($ file );
203
202
while ($ xml ->parse ("DisorderList " ) == TRUE ) {
204
203
$ x = $ xml ->GetXMLRoot ();
205
204
foreach ($ x ->Disorder AS $ d ) {
206
- // var_dump($d);exit;
205
+
207
206
$ orphanet_id = parent ::getNamespace ().((string )$ d ->OrphaNumber );
208
- if (isset ($ d ->ClassOfPrevalence )) {
209
- $ id = parent ::getNamespace ().((string ) $ d ->ClassOfPrevalence ->attributes ()->id );
210
- $ name = (string ) $ d ->ClassOfPrevalence ->Name ;
211
- if ($ name != '' && $ name != 'Unknown ' && $ name != 'No data available ' ) {
212
- if (!isset ($ seen [$ name ])) {
213
- $ seen [$ name ] = true ;
214
- $ a = explode (" / " , $ name );
215
- $ size = str_replace (" " ,"" ,$ a [1 ]);
216
- $ upper_bound = $ lower_bound = '' ;
217
- if ($ a [0 ][0 ] == '< ' ) {
218
- $ upper_bound = substr ($ a [0 ],1 ) / $ size ;
219
- } else if ($ a [0 ][0 ] == '> ' ) {
220
- $ lower_bound = substr ($ a [0 ],1 ) / $ size ;
221
- } else {
222
- $ b = explode ("- " ,$ a [0 ]);
223
- $ lower_bound = $ b [0 ] / $ size ;
224
- $ upper_bound = $ b [1 ] / $ size ;
225
- }
226
- if ($ upper_bound ) {
207
+ $ disease_name = (string ) $ d ->Name ;
208
+
209
+ foreach ($ d ->PrevalenceList ->Prevalence AS $ pl ) {
210
+ $ id = parent ::getRes ()."pl " .((string ) $ pl ->attributes ()->id );
211
+ parent ::addRDF (
212
+ parent ::describeClass ($ id ,"Prevalence " ,parent ::getVoc ()."Prevalence " ).
213
+ parent ::describeIndividual ($ id , "Prevalence for $ disease_name " , parent ::getVoc ()."Prevalence " )
214
+ );
215
+ $ type_id = parent ::getRes ()."pt " .(string ) $ pl ->PrevalenceType ->attributes ()->id ;
216
+ $ type_label = (string ) $ pl ->PrevalenceType ->Name ;
217
+ if ($ type_label != "" ) {
218
+ parent ::addRDF (
219
+ parent ::describeIndividual ($ type_id , $ type_label , parent ::getVoc ()."Prevalence-Type " ).
220
+ parent ::triplify ($ id , parent ::getVoc ()."prevalence-type " , $ type_id ).
221
+ parent ::triplify ($ orphanet_id , parent ::getVoc ()."prevalence " , $ id )
222
+ );
223
+ }
224
+
225
+ $ qual_id = parent ::getRes ()."qu " .(string ) $ pl ->PrevalenceQualification ->attributes ()->id ;
226
+ $ qual_label = (string ) $ pl ->PrevalenceQualification ->Name ;
227
+ if ($ qual_label != "" ) {
228
+ parent ::addRDF (
229
+ parent ::describeIndividual ($ qual_id , $ qual_label , parent ::getVoc ()."Prevalence-Qualification " ).
230
+ parent ::triplify ($ id , parent ::getVoc ()."prevalence-qualification " , $ qual_id )
231
+ );
232
+ }
233
+
234
+ $ prev_id = parent ::getRes ()."pr " .(string ) $ pl ->PrevalenceClass ->attributes ()->id ;
235
+ $ prev_label = (string ) $ pl ->PrevalenceClass ->Name ;
236
+ if ($ prev_label != "" ) {
237
+ parent ::addRDF (
238
+ parent ::describeIndividual ($ prev_id , $ prev_label , parent ::getVoc ()."Prevalence-Value " ).
239
+ parent ::triplify ($ id , parent ::getVoc ()."prevalence-value " , $ prev_id )
240
+ );
241
+ }
242
+
243
+ $ geo_id = parent ::getRes ()."geo " .(string ) $ pl ->PrevalenceGeographic ->attributes ()->id ;
244
+ $ geo_label = (string ) $ pl ->PrevalenceGeographic ->Name ;
245
+ if ($ geo_label != "" ) {
246
+ parent ::addRDF (
247
+ parent ::describeIndividual ($ geo_id , $ geo_label , parent ::getVoc ()."Geographic-Prevalence " ).
248
+ parent ::triplify ($ id , parent ::getVoc ()."prevalence-geo " , $ geo_id )
249
+ );
250
+ }
251
+
252
+ $ val_id = parent ::getRes ()."val " .(string ) $ pl ->PrevalenceValidationStatus ->attributes ()->id ;
253
+ $ val_label = (string ) $ pl ->PrevalenceValidationStatus ->Name ;
254
+ if ($ val_label != "" ) {
255
+ parent ::addRDF (
256
+ parent ::describeIndividual ($ val_id , $ val_label , parent ::getVoc ()."Prevalence-Validation-Status " ).
257
+ parent ::triplify ($ id , parent ::getVoc ()."prevalence-status " , $ val_id )
258
+ );
259
+ }
260
+ $ valmoy = (string ) $ pl ->ValMoy ;
261
+ if ($ valmoy != "" ) {
262
+ parent ::addRDF (
263
+ parent ::triplifyString ($ id , parent ::getVoc ()."val-moy " , $ valmoy )
264
+ );
265
+ }
266
+
267
+
268
+ $ source = trim ((string ) $ pl ->Source );
269
+ if ($ source and (strlen ($ source ) != 0 )) {
270
+ //23712425[PMID]
271
+ preg_match_all ("/([0-9]*)\[([^\]]*)?\]/ " ,$ source , $ m , PREG_SET_ORDER );
272
+ foreach ($ m AS $ i ) {
273
+ if (isset ($ i [2 ]) and ($ i [2 ] == "PMID " )) {
274
+ $ source_id = "PMID: " .$ i [1 ];
227
275
parent ::addRDF (
228
- parent ::triplifyString ($ id ,parent ::getVoc ()."upper-bound " , $ upper_bound , " xsd:float " )
276
+ parent ::triplify ($ id , parent ::getVoc ()."source " , $ source_id )
229
277
);
230
- }
231
- if ($ lower_bound ) {
278
+ } else {
232
279
parent ::addRDF (
233
- parent ::triplifyString ($ id ,parent ::getVoc ()."lower-bound " , $ lower_bound , " xsd:float " )
280
+ parent ::triplifyString ($ id , parent ::getVoc ()."source " , $ i [ 0 ] )
234
281
);
235
282
}
236
- }
237
- parent ::addRDF (
238
- parent ::triplify ($ orphanet_id , parent ::getVoc ()."prevalence " , $ id ).
239
- parent ::describeClass ($ id ,$ name ,parent ::getVoc ()."Prevalence " )
240
- );
241
283
242
- //echo parent::getRDF();exit;
284
+ }
285
+
243
286
}
244
287
}
288
+ parent ::writeRDFBufferToWriteFile ();
289
+ }
290
+ }
291
+ unset($ xml );
292
+ }
293
+
294
+ function onset ($ file )
295
+ {
296
+ $ seen = '' ;
297
+ $ xml = new CXML ($ file );
298
+ while ($ xml ->parse ("DisorderList " ) == TRUE ) {
299
+ $ x = $ xml ->GetXMLRoot ();
300
+ foreach ($ x ->Disorder AS $ d ) {
301
+ // var_dump($d);exit;
302
+ $ orphanet_id = parent ::getNamespace ().((string )$ d ->OrphaNumber );
303
+ $ disease_name = (string ) $ d ->Name ;
304
+ foreach ($ d ->PrevalanceList AS $ pl ) {
305
+ $ id = parent ::getNamespace ().((string ) $ pl ->attributes ()->id );
306
+
307
+ parent ::addRDF (
308
+ parent ::triplify ($ orphanet_id , parent ::getVoc ()."prevalence " , $ id ).
309
+ parent ::describeClass ($ id ,$ name ,parent ::getVoc ()."Prevalence " )
310
+ );
311
+
312
+ }
245
313
if (isset ($ d ->AverageAgeofOnset )) {
246
314
$ id = parent ::getNamespace ().((string ) $ d ->AverageAgeOfOnset ->attributes ()->id );
247
315
$ name = (string ) $ d ->AverageAgeOfOnset ->Name ;
@@ -281,7 +349,8 @@ function epi ($file)
281
349
}
282
350
unset($ xml );
283
351
}
284
-
352
+
353
+
285
354
function phenotypefreq ($ file )
286
355
{
287
356
/*
@@ -318,11 +387,11 @@ function phenotypefreq($file)
318
387
$ orphanet_id = parent ::getNamespace ().((string )$ d ->OrphaNumber );
319
388
$ disease_name = ((string )$ d ->Name );
320
389
foreach ($ d ->HPODisorderAssociationList ->HPODisorderAssociation AS $ ds ) {
321
- $ sfid = parent ::getNamespace () .((string )$ ds ->attributes ()->id );
390
+ $ sfid = parent ::getRes (). " sf " .((string )$ ds ->attributes ()->id );
322
391
$ s = (string ) $ ds ->HPO ->HPOTerm ;
323
392
$ sid = $ ds ->HPO ->HPOId ;
324
393
$ f = (string ) $ ds ->HPOFrequency ->Name ;
325
- $ fid = parent ::getRes ().((string ) $ ds ->HPOFrequency ->attributes ()->id );
394
+ $ fid = parent ::getRes ()." f " . ((string ) $ ds ->HPOFrequency ->attributes ()->id );
326
395
327
396
$ diagnostic = false ;
328
397
if ($ ds ->DiagnosticCriteria ->Name ) {
@@ -450,11 +519,11 @@ function genes($file)
450
519
451
520
$ dga_id = parent ::getRes ().((string )$ d ->OrphaNumber )."_ " .md5 ($ dga ->asXML ());
452
521
$ ga = $ dga ->DisorderGeneAssociationType ;
453
- $ ga_id = parent ::getNamespace () .((string ) $ ga ->attributes ()->id );
522
+ $ ga_id = parent ::getRes (). " ga " .((string ) $ ga ->attributes ()->id );
454
523
$ ga_label = (string ) $ ga ->Name ;
455
524
456
525
$ s = $ dga ->DisorderGeneAssociationStatus ;
457
- $ s_id = parent ::getNamespace () .((string ) $ s ->attributes ()->id );
526
+ $ s_id = parent ::getRes (). " st " .((string ) $ s ->attributes ()->id );
458
527
$ s_label = (string ) $ s ->Name ;
459
528
460
529
parent ::addRDF (
0 commit comments