Skip to content

Commit 103b826

Browse files
id fixes; addition of prevalence parser
1 parent 85d2f7c commit 103b826

File tree

1 file changed

+108
-39
lines changed

1 file changed

+108
-39
lines changed

Diff for: orphanet/orphanet.php

+108-39
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,13 @@ class ORPHANETParser extends Bio2RDFizer
3535
{
3636
private $filemap = array(
3737
'disease' => 'en_product1.xml',
38-
'epi' => 'en_product9_prev.xml',
38+
'prevalence' => 'en_product9_prev.xml',
3939
'phenotypefreq' => 'en_product4.xml',
40-
# 'signs' => 'en_product5.xml',
4140
'genes' => 'en_product6.xml'
4241
);
4342
function __construct($argv) {
4443
parent::__construct($argv, "orphanet");
45-
parent::addParameter('files',true,'all|disease|phenotypefreq|genes','all','all or comma-separated list of ontology short names to process');
44+
parent::addParameter('files',true,'all|disease|genes|phenotypefreq|prevalence','all','all or comma-separated list of ontology short names to process');
4645
parent::addParameter('download_url',false,null,'http://www.orphadata.org/data/xml/');
4746
parent::initialize();
4847
}
@@ -196,52 +195,121 @@ function disease($file)
196195
unset($xml);
197196
}
198197

199-
function epi ($file)
198+
function prevalence ($file)
200199
{
201200
$seen = '';
202201
$xml = new CXML($file);
203202
while($xml->parse("DisorderList") == TRUE) {
204203
$x = $xml->GetXMLRoot();
205204
foreach($x->Disorder AS $d) {
206-
// var_dump($d);exit;
205+
207206
$orphanet_id = parent::getNamespace().((string)$d->OrphaNumber);
208-
if(isset($d->ClassOfPrevalence)) {
209-
$id = parent::getNamespace().((string) $d->ClassOfPrevalence->attributes()->id);
210-
$name = (string) $d->ClassOfPrevalence->Name;
211-
if($name != '' && $name != 'Unknown' && $name != 'No data available') {
212-
if(!isset($seen[$name])) {
213-
$seen[$name] = true;
214-
$a = explode (" / ", $name);
215-
$size = str_replace(" ","",$a[1]);
216-
$upper_bound = $lower_bound = '';
217-
if($a[0][0] == '<') {
218-
$upper_bound = substr($a[0],1) / $size;
219-
} else if($a[0][0] == '>') {
220-
$lower_bound = substr($a[0],1) / $size;
221-
} else {
222-
$b = explode("-",$a[0]);
223-
$lower_bound = $b[0] / $size;
224-
$upper_bound = $b[1] / $size;
225-
}
226-
if($upper_bound) {
207+
$disease_name = (string) $d->Name;
208+
209+
foreach($d->PrevalenceList->Prevalence AS $pl) {
210+
$id = parent::getRes()."pl".((string) $pl->attributes()->id);
211+
parent::addRDF(
212+
parent::describeClass($id,"Prevalence",parent::getVoc()."Prevalence").
213+
parent::describeIndividual($id, "Prevalence for $disease_name", parent::getVoc()."Prevalence")
214+
);
215+
$type_id = parent::getRes()."pt".(string) $pl->PrevalenceType->attributes()->id;
216+
$type_label = (string) $pl->PrevalenceType->Name;
217+
if($type_label != "") {
218+
parent::addRDF(
219+
parent::describeIndividual($type_id, $type_label, parent::getVoc()."Prevalence-Type").
220+
parent::triplify($id, parent::getVoc()."prevalence-type", $type_id).
221+
parent::triplify($orphanet_id, parent::getVoc()."prevalence", $id)
222+
);
223+
}
224+
225+
$qual_id = parent::getRes()."qu".(string) $pl->PrevalenceQualification->attributes()->id;
226+
$qual_label = (string) $pl->PrevalenceQualification->Name;
227+
if($qual_label != "") {
228+
parent::addRDF(
229+
parent::describeIndividual($qual_id, $qual_label, parent::getVoc()."Prevalence-Qualification").
230+
parent::triplify($id, parent::getVoc()."prevalence-qualification", $qual_id)
231+
);
232+
}
233+
234+
$prev_id = parent::getRes()."pr".(string) $pl->PrevalenceClass->attributes()->id;
235+
$prev_label = (string) $pl->PrevalenceClass->Name;
236+
if($prev_label != "") {
237+
parent::addRDF(
238+
parent::describeIndividual($prev_id, $prev_label, parent::getVoc()."Prevalence-Value").
239+
parent::triplify($id, parent::getVoc()."prevalence-value", $prev_id)
240+
);
241+
}
242+
243+
$geo_id = parent::getRes()."geo".(string) $pl->PrevalenceGeographic->attributes()->id;
244+
$geo_label = (string) $pl->PrevalenceGeographic->Name;
245+
if($geo_label != "") {
246+
parent::addRDF(
247+
parent::describeIndividual($geo_id, $geo_label, parent::getVoc()."Geographic-Prevalence").
248+
parent::triplify($id, parent::getVoc()."prevalence-geo", $geo_id)
249+
);
250+
}
251+
252+
$val_id = parent::getRes()."val".(string) $pl->PrevalenceValidationStatus->attributes()->id;
253+
$val_label = (string) $pl->PrevalenceValidationStatus->Name;
254+
if($val_label != "") {
255+
parent::addRDF(
256+
parent::describeIndividual($val_id, $val_label, parent::getVoc()."Prevalence-Validation-Status").
257+
parent::triplify($id, parent::getVoc()."prevalence-status", $val_id)
258+
);
259+
}
260+
$valmoy = (string) $pl->ValMoy;
261+
if($valmoy != "") {
262+
parent::addRDF(
263+
parent::triplifyString($id, parent::getVoc()."val-moy", $valmoy)
264+
);
265+
}
266+
267+
268+
$source = trim((string) $pl->Source);
269+
if($source and (strlen($source) != 0)) {
270+
//23712425[PMID]
271+
preg_match_all("/([0-9]*)\[([^\]]*)?\]/",$source, $m, PREG_SET_ORDER );
272+
foreach($m AS $i) {
273+
if(isset($i[2]) and ($i[2] == "PMID")) {
274+
$source_id = "PMID:".$i[1];
227275
parent::addRDF(
228-
parent::triplifyString($id,parent::getVoc()."upper-bound",$upper_bound, "xsd:float")
276+
parent::triplify($id, parent::getVoc()."source", $source_id)
229277
);
230-
}
231-
if($lower_bound) {
278+
} else {
232279
parent::addRDF(
233-
parent::triplifyString($id,parent::getVoc()."lower-bound",$lower_bound, "xsd:float")
280+
parent::triplifyString($id, parent::getVoc()."source", $i[0])
234281
);
235282
}
236-
}
237-
parent::addRDF(
238-
parent::triplify($orphanet_id, parent::getVoc()."prevalence", $id).
239-
parent::describeClass($id,$name,parent::getVoc()."Prevalence")
240-
);
241283

242-
//echo parent::getRDF();exit;
284+
}
285+
243286
}
244287
}
288+
parent::writeRDFBufferToWriteFile();
289+
}
290+
}
291+
unset($xml);
292+
}
293+
294+
function onset ($file)
295+
{
296+
$seen = '';
297+
$xml = new CXML($file);
298+
while($xml->parse("DisorderList") == TRUE) {
299+
$x = $xml->GetXMLRoot();
300+
foreach($x->Disorder AS $d) {
301+
// var_dump($d);exit;
302+
$orphanet_id = parent::getNamespace().((string)$d->OrphaNumber);
303+
$disease_name = (string) $d->Name;
304+
foreach($d->PrevalanceList AS $pl) {
305+
$id = parent::getNamespace().((string) $pl->attributes()->id);
306+
307+
parent::addRDF(
308+
parent::triplify($orphanet_id, parent::getVoc()."prevalence", $id).
309+
parent::describeClass($id,$name,parent::getVoc()."Prevalence")
310+
);
311+
312+
}
245313
if(isset($d->AverageAgeofOnset)) {
246314
$id = parent::getNamespace().((string) $d->AverageAgeOfOnset->attributes()->id);
247315
$name = (string) $d->AverageAgeOfOnset->Name;
@@ -281,7 +349,8 @@ function epi ($file)
281349
}
282350
unset($xml);
283351
}
284-
352+
353+
285354
function phenotypefreq($file)
286355
{
287356
/*
@@ -318,11 +387,11 @@ function phenotypefreq($file)
318387
$orphanet_id = parent::getNamespace().((string)$d->OrphaNumber);
319388
$disease_name = ((string)$d->Name);
320389
foreach($d->HPODisorderAssociationList->HPODisorderAssociation AS $ds) {
321-
$sfid = parent::getNamespace().((string)$ds->attributes()->id);
390+
$sfid = parent::getRes()."sf".((string)$ds->attributes()->id);
322391
$s = (string) $ds->HPO->HPOTerm;
323392
$sid = $ds->HPO->HPOId;
324393
$f = (string) $ds->HPOFrequency->Name;
325-
$fid = parent::getRes().((string) $ds->HPOFrequency->attributes()->id);
394+
$fid = parent::getRes()."f".((string) $ds->HPOFrequency->attributes()->id);
326395

327396
$diagnostic = false;
328397
if($ds->DiagnosticCriteria->Name) {
@@ -450,11 +519,11 @@ function genes($file)
450519

451520
$dga_id = parent::getRes().((string)$d->OrphaNumber)."_".md5($dga->asXML());
452521
$ga = $dga->DisorderGeneAssociationType;
453-
$ga_id = parent::getNamespace().((string) $ga->attributes()->id);
522+
$ga_id = parent::getRes()."ga".((string) $ga->attributes()->id);
454523
$ga_label = (string) $ga->Name;
455524

456525
$s = $dga->DisorderGeneAssociationStatus;
457-
$s_id = parent::getNamespace().((string) $s->attributes()->id);
526+
$s_id = parent::getRes()."st".((string) $s->attributes()->id);
458527
$s_label = (string) $s->Name;
459528

460529
parent::addRDF(

0 commit comments

Comments
 (0)