Skip to content

Commit fc4b998

Browse files
partial commit
1 parent 86c22e6 commit fc4b998

File tree

1 file changed

+87
-87
lines changed

1 file changed

+87
-87
lines changed

sider/sider.php

+87-87
Original file line numberDiff line numberDiff line change
@@ -39,28 +39,17 @@ function __construct($argv) {
3939
parent::__construct($argv, "sider");
4040

4141
// set and print application parameters
42-
parent::addParameter('files',true,'all|label_mapping|adverse_effects_raw|indications_raw|meddra_freq_parsed','all','all or comma-separated list of ontology short names to process');
42+
parent::addParameter('files',true,'all|indications|se|freq','all','all or comma-separated list of ontology short names to process');
4343
parent::addParameter('download_url',false,null,'http://sideeffects.embl.de/media/download/');
4444

4545
parent::initialize();
4646
}
4747

4848
function run() {
49-
50-
if(parent::getParameterValue('download') === true)
51-
{
52-
$this->download();
53-
}
54-
if(parent::getParameterValue('process') === true)
55-
{
56-
$this->process();
57-
}
58-
59-
}
60-
61-
function download(){
6249
$idir = parent::getParameterValue('indir');
50+
$odir = parent::getParameterValue('outdir');
6351
$files = parent::getParameterValue('files');
52+
$dataset_description = '';
6453

6554
if($files == 'all') {
6655
$files = explode('|', parent::getParameterList('files'));
@@ -70,8 +59,11 @@ function download(){
7059
}
7160

7261
foreach($files AS $file) {
73-
$lfile = $idir.$file.'.tsv.gz';
74-
$rfile = parent::getParameterValue('download_url').$file.'.tsv.gz';
62+
$f = $file;
63+
if($file != "freq") $f = "all_".$file;
64+
$f = "meddra_".$f.".tsv.gz";
65+
$lfile = $idir.$f;
66+
$rfile = parent::getParameterValue('download_url').$f;
7567
if(!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
7668
echo "downloading $file... ";
7769
$ret = file_get_contents($rfile);
@@ -86,43 +78,15 @@ function download(){
8678
}
8779
echo "done!".PHP_EOL;
8880
}
89-
}//foreach
90-
}
91-
92-
function process(){
93-
94-
$idir = parent::getParameterValue('indir');
95-
$odir = parent::getParameterValue('outdir');
96-
$files = parent::getParameterValue('files');
97-
98-
if($files == 'all') {
99-
$files = explode('|', parent::getParameterList('files'));
100-
array_shift($files);
101-
} else {
102-
$files = explode(',', parent::getParameterValue('files'));
103-
}
104-
105-
parent::setCheckpoint('dataset');
106-
107-
$dataset_description = '';
108-
109-
$graph_uri = parent::getGraphURI();
110-
if(parent::getParameterValue('dataset_graph') == true) parent::setGraphURI(parent::getDatasetURI());
11181

112-
foreach($files AS $file) {
113-
$lfile = $idir.$file.'.tsv.gz';
114-
$rfile = parent::getParameterValue('download_url').$file.'.tsv.gz';
115-
116-
echo "Processing $file... ";
82+
echo "Processing $f... ";
11783
parent::setReadFile($lfile,true);
11884

11985
$suffix = parent::getParameterValue('output_format');
12086
$ofile = "sider-".$file.'.'.$suffix;
12187
$gz = false;
12288

123-
if(strstr(parent::getParameterValue('output_format'), "gz")) {
124-
$gz = true;
125-
}
89+
if(strstr(parent::getParameterValue('output_format'), "gz")) $gz = true;
12690

12791
parent::setWriteFile($odir.$ofile, $gz);
12892
$this->$file();
@@ -320,43 +284,83 @@ function GetPCFromStereo($id)
320284
Format: label identifier, concept id, name of side effect (as found on the label)
321285
*/
322286

323-
function adverse_effects_raw()
287+
function se()
324288
{
325289
$declared = null;
326290

327291
parent::setCheckpoint('file');
328-
while($l = $this->GetReadFile()->Read()) {
292+
while($l = $this->getReadFile()->Read()) {
329293
$a = explode("\t",$l);
330-
$id = "sider:".urlencode($a[0]);
331-
$cui = "umls:".$a[1];
332-
$cui_label= strtolower(trim($a[2]));
294+
if(count($a) != 6) {
295+
trigger_error("Expecting 6 columns, found ".count($a)." instead.", E_USER_ERROR);
296+
exit;
297+
}
298+
$stitch_flat = "stitch:".$a[0];
299+
$stitch_stereo = "stitch:".$a[1];
300+
$cui = "umls:".$a[2];
301+
$term_type = $a[3];
302+
$term_type_cui = $a[4];
303+
$term_type_label = $a[5];
304+
305+
if($term_type == 'LLT') continue;
306+
307+
$id = "sider:".md5("se".$stitch_flat.$cui);
308+
309+
$cui_label= strtolower(trim($term_type_label));
310+
if(!isset($declared[$cui])) {
311+
parent::addRDF(
312+
parent::describeClass($cui, $cui_label)
313+
);
314+
$declared[$cui] = '';
315+
}
316+
333317
parent::addRDF(
334-
parent::describeClass($cui, $cui_label).
335-
parent::triplify($id, parent::getVoc()."side-effect", $cui)
318+
parent::describeIndividual($id, "$stitch_flat $cui_label side effect", parent::getVoc()."Drug-Side-Effect").
319+
parent::triplify($id, parent::getVoc()."side-effect", $cui).
320+
parent::triplify($id, parent::getVoc()."stitch-flat", $stitch_flat).
321+
parent::triplify($id, parent::getVoc()."stitch-stereo", $stitch_stereo)
336322
);
337323
parent::setCheckpoint('record');
338324
}
325+
339326
parent::setCheckpoint('file');
340327
}
341328

342-
function indications_raw()
329+
function indications()
343330
{
344331
$declared = null;
345-
332+
$list = null;
346333
parent::setCheckpoint('file');
347-
while($l = $this->GetReadFile()->Read()) {
334+
while($l = $this->getReadFile()->Read()) {
348335
parent::setCheckpoint('record');
349336

350337
$a = explode("\t",$l);
351-
$id = "sider:".urlencode($a[0]);
352-
$cui = "umls:".$a[1];
353-
$cui_label = strtolower(trim($a[2]));
338+
list($stitch_flat,$cui,$provenance,$cui_label,$term_type,$term_cui,$term_cui_label) = $a;
339+
$id = "sider:".md5("i".$stitch_flat.$cui);
340+
341+
if($term_type == "LLT" or isset($list[$id])) continue;
342+
if(!isset($list[$id])) {
343+
$list[$id] = '';
344+
}
345+
346+
347+
$stitch_id = "stitch:$stitch_flat";
348+
$meddra_id = "meddra:$cui";
349+
350+
if(!isset($declared[$cui])) {
351+
parent::addRDF(
352+
parent::describeClass($meddra_id, $cui_label)
353+
);
354+
$declared[$cui] = '';
355+
}
354356

355357
parent::addRDF(
356-
parent::describeClass($cui, $cui_label).
357-
parent::triplify($id, parent::getVoc()."indication", $cui)
358+
parent::describeIndividual($id, $stitch_id." - ".$meddra_id." indication ", parent::getVoc()."Drug-Indication-Association").
359+
parent::describeClass(parent::getVoc()."Drug-Indication-Association","Drug-Disease Association").
360+
parent::triplify($id, parent::getVoc()."drug", $stitch_id).
361+
parent::triplify($id, parent::getVoc()."indication", $meddra_id).
362+
parent::triplifyString($id, parent::getVoc()."provenance", $provenance)
358363
);
359-
parent::setCheckpoint('record');
360364

361365
}
362366
parent::setCheckpoint('file');
@@ -384,30 +388,26 @@ function indications_raw()
384388
matches the upper bound. Due to the nature of the data, there can be more than one frequency for the same label,
385389
e.g. from different clinical trials or for different levels of severeness.
386390
*/
387-
function meddra_freq_parsed()
391+
function freq()
388392
{
389-
$cols = 12;
393+
$cols = 10;
390394
$i = 1;
391395
parent::setCheckpoint('file');
392396
while($l = parent::getReadFile()->read()) {
393397
parent::setCheckpoint('record');
394-
395398
$a = explode("\t",str_replace("%","",$l));
396399
if(count($a) != $cols) {
397-
trigger_error("Expecting $cols, but found ".count($a)." instead... skipping file!");
400+
trigger_error("Expecting $cols, but found ".count($a)." instead... skipping file!", E_USER_ERROR);
398401
return false;
399402
}
400-
$label = $a[2];
401-
$label_id = parent::getNamespace().urlencode($label);
402-
$effect_id = "umls:".$a[3];
403-
404-
$id = parent::getRes().md5($a[2].$a[3].$a[6]);
405-
$label = "$a[4] in $label $a[2]";
403+
list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label);
404+
$id = "stitch_resource:".md5("se_freq".$l);
405+
$label = "side effect frequency of $meddra_concept_label for $stitch_id";
406406
parent::addRDF(
407-
parent::describeIndividual($id, $label, parent::getVoc()."Drug-Effect").
408-
parent::describeClass(parent::getVoc()."Drug-Effect","SIDER Drug-Effect").
409-
parent::triplify($id, parent::getVoc()."drug", $label_id).
410-
parent::triplify($id, parent::getVoc()."effect", $effect_id)
407+
parent::describeIndividual($id, $label, parent::getVoc()."Drug-Effect-Frequency").
408+
parent::describeClass(parent::getVoc()."Drug-Effect-Frequency","SIDER Drug-Effect and Frequency").
409+
parent::triplify($id, parent::getVoc()."drug", $stitch_flat).
410+
parent::triplify($id, parent::getVoc()."effect", "meddra:".$meddra_concept_id)
411411
);
412412

413413
if($a[5]){
@@ -416,25 +416,25 @@ function meddra_freq_parsed()
416416
);
417417
}
418418

419-
$fid = $id.md5($a[5].$a[6].$a[7].$a[8]);
420-
// $fid = $id.($i++);
421-
$flabel = $a[6];
422-
$ftype = parent::getVoc().ucfirst($a[6])."-Frequency";
423419
$number = false;
424-
if(is_numeric($a[6])) {
425-
$flabel = $a[6]."%";
426-
$ftype_label = "Specified-Frequency";
420+
if(is_numeric($freq)) {
421+
$flabel = $freq."%";
422+
$ftype_label = "Exact-Frequency";
427423
$ftype = parent::getVoc().$ftype_label;
428424
$number = true;
425+
} else {
426+
$flabel = $freq;
427+
$ftype_label = "Qualitative-Frequency";
428+
$ftype = parent::getVoc()."$ftype_label;
429429
}
430-
if($a[7] != $a[8]) {
431-
$flabel .= "($a[7]-$a[8])";
430+
if($freq_lower != $freq_upper) {
431+
$flabel .= "($freq_lower-$freq_upper)";
432432
$ftype_label = "Range-Frequency";
433433
$ftype = parent::getVoc().$ftype_label;
434-
}
434+
}
435435

436436
parent::addRDF(
437-
parent::triplify($id,parent::getVoc()."reported-frequency",$fid).
437+
parent::triplify($id,parent::getVoc()."AQualitative-Frequency",$fid).
438438
parent::describeIndividual($fid,$flabel,$ftype).
439439
parent::describeClass($ftype, $ftype_label)
440440
);

0 commit comments

Comments
 (0)