Skip to content

Commit 8a79516

Browse files
complete for sider4
1 parent fc4b998 commit 8a79516

File tree

1 file changed

+74
-200
lines changed

1 file changed

+74
-200
lines changed

sider/sider.php

+74-200
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,6 @@ function run() {
108108
->setLicense("http://creativecommons.org/licenses/by-nc-sa/3.0/")
109109
->setDataset("http://identifiers.org/sider.effect/");
110110

111-
if($file == "label_mapping") $source_file->setLicense("http://creativecommons.org/publicdomain/zero/1.0/");
112-
113111
$prefix = parent::getPrefix();
114112
$bVersion = parent::getParameterValue('bio2rdf_release');
115113
$date = parent::getDate(filemtime($odir.$ofile));
@@ -141,131 +139,8 @@ function run() {
141139

142140
//reset graph URI to default value
143141
parent::setGraphURI($graph_uri);
144-
145142
}
146-
/*
147-
1 & 2: generic and brand names
148-
149-
3: a marker if the drug could be successfully mapped to STITCH. Possible values:
150-
- [empty field]: success
151-
- combination: two or more drugs were combined
152-
- not found: could not find the name in the database
153-
- mapping conflict: the available names point to two different compounds
154-
- template: a package insert that contains information for a group of related drugs
155-
156-
4 & 5: STITCH compound ids, based on PubChem. Salt forms and stereo-isomers have been merged.
157-
Column 4: "flat compound", i.e. stereo-isomers have been merged into one compound
158-
Column 5: stereo-specific compound id
159-
160-
To get the PubChem Compound Ids: take absolute value, for flat compounds ids: subtract 100000000
161-
E.g. aspirin: -100002244 --> 2244
162-
163-
6: URL of the downloaded PDF. This column is empty for FDA SPL labels, which are available in XML.
164-
Unfortunately, many links have become stale since the labels were downloaded in 2009.
165-
166-
7: label identifier
167-
*/
168-
function label_mapping()
169-
{
170-
parent::setCheckpoint('file');
171-
172-
$declared = null;
173-
while($l = parent::getReadFile()->Read(1000000)) {
174-
parent::setCheckpoint('record');
175-
176-
$a = explode("\t",$l);
177-
$id = parent::getNamespace().urlencode(trim($a[6]));
178-
179-
$gnames_list = explode(";",strtolower(trim($a[1])));
180-
array_unique($gnames_list);
181-
asort($gnames_list);
182-
$gnames = implode(" + ",$gnames_list);
183-
if($a[2] == "combination") {
184-
$label = "combination: $gnames";
185-
$type = "Combination-Drug";
186-
} else {
187-
if($a[0]) $label .= $a[0]." (".$gnames.")";
188-
else $label = $gnames;
189-
$type = "Drug";
190-
}
191-
192-
parent::addRDF(
193-
parent::describeIndividual($id, $label, parent::getVoc().$type).
194-
parent::describeClass(parent::getVoc().$type,"SIDER ".$type)
195-
);
196-
197-
// attempt to extract the spl id
198-
$b = explode("_",trim($a[6]));
199-
if(isset($b[1])) {
200-
$c = explode("-",$b[1]);
201-
if(count($c) == 5) {
202-
// possibly an SPL id
203-
parent::addRDF(parent::triplify($id,parent::getVoc()."x-spl","dailymed:".$b[1]));
204-
}
205-
}
206-
207-
if(trim($a[0])) {
208-
$brand_label = strtolower(trim($a[0]));
209-
$brand_qname = parent::getRes().md5($brand_label);
210-
parent::addRDF(
211-
parent::describeIndividual($brand_qname, $brand_label, parent::getVoc()."Brand-Drug-Name").
212-
parent::describeClass(parent::getVoc()."Brand-Drug-Name","Brand Drug Name").
213-
parent::triplify($id, parent::getVoc()."brand-name", $brand_qname)
214-
);
215-
}
216-
if(trim($a[1])) {
217-
foreach($gnames_list AS $generic_name) {
218-
$generic_label = trim($generic_name);
219-
$generic_qname = parent::getRes().md5($generic_label);
220-
parent::addRDF(
221-
parent::describeIndividual($generic_qname, $generic_label, parent::getVoc()."Generic-Drug-Name").
222-
parent::describeClass(parent::getVoc()."Generic-Drug-Name","Generic Drug Name").
223-
parent::triplify($id, parent::getVoc()."generic-name", $generic_qname)
224-
);
225-
}
226-
}
227-
228-
if($a[2]){
229-
$mapping_result = str_replace(" ","-",$a[2]);
230-
parent::addRDF(
231-
parent::triplify($id, parent::getVoc()."mapping-result", parent::getVoc().$mapping_result)
232-
);
233-
}
234-
235-
if($a[3]){
236-
parent::addRDF(
237-
parent::triplify($id, parent::getVoc()."stitch-flat-compound-id", "stitch:".$a[3])
238-
);
239-
240-
$pubchemcompound = $this->GetPCFromFlat($a[3]);
241-
parent::addRDF(
242-
parent::triplify($id, parent::getVoc()."pubchem-flat-compound-id", "pubchemcompound:".$pubchemcompound)
243-
);
244-
}
245-
246-
if($a[4]){
247-
parent::addRDF(
248-
parent::triplify($id, parent::getVoc()."stitch-stereo-compound-id", "stitch:".$a[4])
249-
);
250-
$pubchemcompound = $this->GetPCFromStereo($a[4]);
251-
parent::addRDF(
252-
parent::triplify($id, parent::getVoc()."pubchem-stereo-compound-id", "pubchemcompound:".$pubchemcompound)
253-
);
254-
}
255143

256-
if($a[5]){
257-
$url = str_replace(" ","+",$a[5]);
258-
parent::addRDF(
259-
parent::QQuadO_URL($id, parent::getVoc()."pdf-url", $url)
260-
);
261-
}
262-
263-
parent::setCheckpoint('record');
264-
265-
}
266-
parent::setCheckpoint('file');
267-
}
268-
269144
function GetPCFromFlat($id)
270145
{
271146
return ltrim(abs($id)-100000000, "0");
@@ -313,12 +188,32 @@ function se()
313188
);
314189
$declared[$cui] = '';
315190
}
316-
191+
if(!isset($declared[$stitch_flat])) {
192+
$pubchem_id = "pubchem.compound:".ltrim( substr($stitch_flat,4), "0");
193+
$stereo_id = "pubchem.compound:".ltrim( substr($stitch_stereo,4), "0");
194+
parent::addRDF(
195+
parent::triplify($stitch_flat, "rdf:type", parent::getVoc()."Flat-Compound").
196+
parent::describeClass(parent::getVoc()."Flat-Compound", "Flat compound").
197+
parent::triplify($stitch_flat, parent::getVoc()."x-pubchem.compound", $pubchem_id).
198+
parent::triplify($stitch_flat, parent::getVoc()."stitch-stereo", $stitch_stereo)
199+
);
200+
$declared[$stitch_flat] = '';
201+
}
202+
if(!isset($declared[$stitch_stereo])) {
203+
$pubchem_id = "pubchem.compound:".ltrim( substr($stitch_stereo,4), "0");
204+
parent::addRDF(
205+
parent::triplify($stitch_stereo, "rdf:type", parent::getVoc()."Stereo-Compound").
206+
parent::describeClass(parent::getVoc()."Stereo-Compound", "Stereo compound").
207+
parent::triplify($stitch_stereo, parent::getVoc()."x-pubchem.compound", $pubchem_id).
208+
parent::triplify($stitch_stereo, parent::getVoc()."stitch-flat", $stitch_flat)
209+
);
210+
$declared[$stitch_stereo] = '';
211+
}
212+
317213
parent::addRDF(
318-
parent::describeIndividual($id, "$stitch_flat $cui_label side effect", parent::getVoc()."Drug-Side-Effect").
319-
parent::triplify($id, parent::getVoc()."side-effect", $cui).
320-
parent::triplify($id, parent::getVoc()."stitch-flat", $stitch_flat).
321-
parent::triplify($id, parent::getVoc()."stitch-stereo", $stitch_stereo)
214+
parent::describeIndividual($id, "$stitch_flat $cui_label effect", parent::getVoc()."Drug-Effect-Association").
215+
parent::triplify($id, parent::getVoc()."effect", $cui).
216+
parent::triplify($id, parent::getVoc()."drug", $stitch_flat)
322217
);
323218
parent::setCheckpoint('record');
324219
}
@@ -343,7 +238,6 @@ function indications()
343238
$list[$id] = '';
344239
}
345240

346-
347241
$stitch_id = "stitch:$stitch_flat";
348242
$meddra_id = "meddra:$cui";
349243

@@ -353,6 +247,15 @@ function indications()
353247
);
354248
$declared[$cui] = '';
355249
}
250+
if(!isset($declared[$stitch_flat])) {
251+
$pubchem_id = "pubchem.compound:".ltrim( substr($stitch_flat,4), "0");
252+
parent::addRDF(
253+
parent::triplify($stitch_id, "rdf:type", parent::getVoc()."Flat-Compound").
254+
parent::describeClass(parent::getVoc()."Flat-Compound", "STITCH Flat compound").
255+
parent::triplify($stitch_id, parent::getVoc()."x-flat-pubchem.compound", $pubchem_id)
256+
);
257+
$declared[$stitch_flat] = '';
258+
}
356259

357260
parent::addRDF(
358261
parent::describeIndividual($id, $stitch_id." - ".$meddra_id." indication ", parent::getVoc()."Drug-Indication-Association").
@@ -394,103 +297,74 @@ function freq()
394297
$i = 1;
395298
parent::setCheckpoint('file');
396299
while($l = parent::getReadFile()->read()) {
397-
parent::setCheckpoint('record');
398300
$a = explode("\t",str_replace("%","",$l));
399301
if(count($a) != $cols) {
400302
trigger_error("Expecting $cols, but found ".count($a)." instead... skipping file!", E_USER_ERROR);
401303
return false;
402304
}
403-
list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label);
404-
$id = "stitch_resource:".md5("se_freq".$l);
405-
$label = "side effect frequency of $meddra_concept_label for $stitch_id";
305+
list($stitch_flat, $stitch_stereo, $cui, $placebo, $freq, $freq_lower, $freq_upper, $concept_type, $meddra_concept_id, $meddra_concept_label) = $a;
306+
if($concept_type == "LLT") continue;
307+
$meddra_concept_label = trim($meddra_concept_label);
308+
309+
$id = "stitch_resource:".md5("se_freq".$l);
310+
$stitch_flat = "stitch:$stitch_flat";
311+
$label = "$meddra_concept_label frequency for $stitch_flat";
406312
parent::addRDF(
407313
parent::describeIndividual($id, $label, parent::getVoc()."Drug-Effect-Frequency").
408314
parent::describeClass(parent::getVoc()."Drug-Effect-Frequency","SIDER Drug-Effect and Frequency").
409315
parent::triplify($id, parent::getVoc()."drug", $stitch_flat).
410316
parent::triplify($id, parent::getVoc()."effect", "meddra:".$meddra_concept_id)
411317
);
412318

413-
if($a[5]){
319+
if($placebo){
414320
parent::addRDF(
415321
parent::triplifyString($id, parent::getVoc()."placebo", "true", "xsd:boolean")
416322
);
417323
}
418324

419-
$number = false;
420-
if(is_numeric($freq)) {
421-
$flabel = $freq."%";
422-
$ftype_label = "Exact-Frequency";
423-
$ftype = parent::getVoc().$ftype_label;
424-
$number = true;
425-
} else {
426-
$flabel = $freq;
427-
$ftype_label = "Qualitative-Frequency";
428-
$ftype = parent::getVoc()."$ftype_label;
429-
}
430-
if($freq_lower != $freq_upper) {
431-
$flabel .= "($freq_lower-$freq_upper)";
432-
$ftype_label = "Range-Frequency";
433-
$ftype = parent::getVoc().$ftype_label;
434-
}
325+
$number = false;
326+
if(is_numeric($freq)) {
327+
$flabel = $freq."%";
328+
$ftype_label = "Exact-Frequency";
329+
$ftype = parent::getVoc().$ftype_label;
330+
$number = true;
331+
} else {
332+
$flabel = $freq;
333+
$ftype_label = "Qualitative-Frequency";
334+
$ftype = parent::getVoc()."$ftype_label";
335+
}
336+
if($freq_lower != $freq_upper) {
337+
$flabel .= "($freq_lower-$freq_upper)";
338+
$ftype_label = "Range-Frequency";
339+
$ftype = parent::getVoc().$ftype_label;
340+
}
435341

342+
$fid = $id.md5($a[5].$a[6].$a[8]);
343+
parent::addRDF(
344+
parent::triplify($id,parent::getVoc()."frequency",$fid).
345+
parent::describeIndividual($fid,$flabel,$ftype).
346+
parent::describeClass($ftype, $ftype_label)
347+
);
348+
349+
if($number == true) {
436350
parent::addRDF(
437-
parent::triplify($id,parent::getVoc()."AQualitative-Frequency",$fid).
438-
parent::describeIndividual($fid,$flabel,$ftype).
439-
parent::describeClass($ftype, $ftype_label)
351+
parent::triplifyString($fid, parent::getVoc()."frequency-value", $freq/100)
440352
);
441-
442-
if($number == true) {
443-
parent::addRDF(
444-
parent::triplifyString($fid, parent::getVoc()."frequency", $a[6]/100)
445-
);
446-
} else {
447-
parent::addRDF(
448-
parent::triplifyString($fid, parent::getVoc()."frequency", $a[6])
449-
);
450-
}
451-
// if($a[7] != $a[8]){
452-
parent::addRDF(
453-
parent::triplifyString($fid, parent::getVoc()."lower-frequency", $a[7]).
454-
parent::triplifyString($fid, parent::getVoc()."upper-frequency", $a[8])
455-
);
456-
// }
457-
458-
$meddra_id = "umls:$a[10]";
459-
$label = "";
460-
if(trim($a[11])) $label = strtolower(trim($a[11]));
461-
$rel = "preferred-term";
462-
if($a[9] != "LLT") $rel = "lower-level-term";
463-
353+
} else {
464354
parent::addRDF(
465-
parent::triplify($fid, parent::getVoc().$rel, $meddra_id).
466-
parent::describeClass($meddra_id,$label)
355+
parent::triplifyString($fid, parent::getVoc()."frequency-value", $freq)
467356
);
357+
}
358+
parent::addRDF(
359+
parent::triplifyString($fid, parent::getVoc()."lower-frequency", sprintf("%.3f",$freq_lower)).
360+
parent::triplifyString($fid, parent::getVoc()."upper-frequency", sprintf("%.3f",$freq_upper))
361+
);
468362

469363
parent::setCheckpoint('record');
470364
}
471365
parent::setCheckpoint('file');
472366

473367
}
474-
475-
/*
476-
meddra_adverse_effects.tsv.gz
477-
-----------------------------
478368

479-
1 & 2: STITCH compound ids (flat/stereo, see above)
480-
3: UMLS concept id as it was found on the label
481-
4: drug name
482-
5: side effect name
483-
6: MedDRA concept type (LLT = lowest level term, PT = preferred term)
484-
7: UMLS concept id for MedDRA term
485-
8: MedDRA side effect name
486-
487-
All side effects found on the labels are given as LLT. Additionally, the PT is shown. There is at least one
488-
PT for every side effect, but sometimes the PT is the same as the LLT.
489-
*/
490-
// @TODO
491-
function meddra_adverse_effects()
492-
{
493-
494-
}
495369
}
496370
?>

0 commit comments

Comments
 (0)