Skip to content

Commit d02bfcc

Browse files
Merge pull request #468 from bio2rdf/dailymed
Dailymed
2 parents fee1f36 + b15af72 commit d02bfcc

File tree

1 file changed

+297
-0
lines changed

1 file changed

+297
-0
lines changed

dailymed/dailymed.php

+297
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
<?php
2+
/**
3+
Copyright (C) 2013 Michel Dumontier
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy of
6+
this software and associated documentation files (the "Software"), to deal in
7+
the Software without restriction, including without limitation the rights to
8+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
9+
of the Software, and to permit persons to whom the Software is furnished to do
10+
so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
22+
*/
23+
24+
25+
/**
26+
* An RDF generator for DailyMed
27+
* documentation: https://dailymed.nlm.nih.gov/
28+
* @version 1.0
29+
* @author Michel Dumontier
30+
*/
31+
32+
require_once(__DIR__.'/../../php-lib/bio2rdfapi.php');
33+
34+
class DailymedParser extends Bio2RDFizer
35+
{
36+
function __construct($argv) {
37+
parent::__construct($argv, "dailymed");
38+
parent::addParameter('files',true,'all|prescription|otc','all','all or comma-separated list of short names to process');
39+
parent::addParameter('download_url',false,null,'ftp://public.nlm.nih.gov/nlmdata/.dailymed/');
40+
parent::initialize();
41+
}
42+
43+
var $filemap = array(
44+
"prescription" => array(
45+
"dm_spl_release_human_rx_part1.zip",
46+
"dm_spl_release_human_rx_part2.zip",
47+
"dm_spl_release_human_rx_part3.zip",
48+
"dm_spl_release_human_rx_part4.zip"),
49+
"otc" => array(
50+
"dm_spl_release_human_otc_part1.zip",
51+
"dm_spl_release_human_otc_part2.zip",
52+
"dm_spl_release_human_otc_part3.zip",
53+
"dm_spl_release_human_otc_part4.zip",
54+
"dm_spl_release_human_otc_part5.zip",
55+
"dm_spl_release_human_otc_part6.zip",
56+
"dm_spl_release_human_otc_part7.zip"
57+
)
58+
59+
);
60+
61+
function run()
62+
{
63+
$dd = '';
64+
$ldir = parent::getParameterValue('indir');
65+
$odir = parent::getParameterValue('outdir');
66+
$tdir = $ldir."tmp/";
67+
@mkdir ($tdir, 0777);
68+
69+
$files = parent::getParameterValue('files');
70+
if($files == 'all') {
71+
$files = explode('|', parent::getParameterList('files'));
72+
array_shift($files);
73+
} else {
74+
$files = explode(',', parent::getParameterValue('files'));
75+
}
76+
77+
78+
foreach($files AS $filetype) {
79+
echo "processing $filetype ...";
80+
81+
// download files
82+
$files_ = $this->filemap[$filetype];
83+
foreach($files_ AS $file) {
84+
$lfile = $ldir.$file;
85+
$rfile = parent::getParameterValue('download_url').$file;
86+
if(!file_exists($lfile) || parent::getParameterValue('download') == 'true') {
87+
$ret = utils::downloadSingle($rfile,$lfile);
88+
if($ret === false) {
89+
echo "unable to download $file ... skipping".PHP_EOL;
90+
continue;
91+
}
92+
}
93+
}
94+
/*
95+
$xmlfile = "c:/data/download/dailymed/tmp/8ae4a0c1-1424-47a9-9a59-7fe38bedc0c7.xml";
96+
$this->setReadFile($xmlfile);
97+
$this->$filetype($xmlfile);
98+
exit;
99+
*/
100+
// process files
101+
$z= 0;
102+
foreach($files_ AS $file) {
103+
$lfile = $ldir.$file;
104+
$zin1 = new ZipArchive();
105+
if ($zin1->open($lfile) === FALSE) {
106+
trigger_error("Unable to open $lfile");
107+
exit;
108+
}
109+
110+
$suffix = parent::getParameterValue('output_format');
111+
$ofile = "dailymed-".substr($file,0,-4).'.'.$suffix;
112+
$gz = strstr(parent::getParameterValue('output_format'), "gz")?($gz=true):($gz=false);
113+
parent::setWriteFile($odir.$ofile, $gz);
114+
115+
for($i = 0; $i < $zin1->count(); $i++) {
116+
//if(++$z == 20) break;
117+
$entry = $zin1->getNameIndex($i);
118+
echo "processing $entry".PHP_EOL;
119+
120+
// extract the dailymed entry (zip file) as a temporary file
121+
$fileinfo = pathinfo($entry);
122+
$tfile = $tdir.$fileinfo['basename'];
123+
if(!file_exists($tfile)) {
124+
//break;
125+
copy("zip://".$lfile."#".$entry, $tfile);
126+
}
127+
128+
// read the dailmed entry zip file
129+
$zin2 = new ZipArchive();
130+
if ($zin2->open($tfile) !== TRUE) {
131+
trigger_error("Unable to open $lfile2",E_USER_ERROR);
132+
exit;
133+
}
134+
135+
// now find, extract, and process the xml file
136+
for($j = 0; $j < $zin2->count(); $j++) {
137+
$f = $zin2->getNameIndex($j);
138+
if(!strstr($f,".xml")) continue;
139+
140+
$fileinfo = pathinfo($f);
141+
$xmlfile = $tdir.$fileinfo['basename'];
142+
$gzxml = $xmlfile.".gz";
143+
if(!file_exists($gzxml)) {
144+
copy("zip://".$tfile."#".$f, "compress.zlib://".$gzxml);
145+
}
146+
147+
$this->setReadFile($gzxml);
148+
$this->$filetype($gzxml);
149+
$this->getReadFile()->close();
150+
$this->clear();
151+
//unlink($xmlfile);
152+
}
153+
$zin2->close();
154+
//unlink($tfile);
155+
}
156+
$zin1->close();
157+
parent::getWriteFile()->close();
158+
}
159+
160+
// dataset description
161+
$source_file = (new DataResource($this))
162+
->setURI($rfile)
163+
->setTitle("Dailymed: $file")
164+
->setRetrievedDate(parent::getDate(filemtime($lfile)))
165+
->setFormat("application/xml")
166+
->setPublisher("https://dailymed.nlm.nih.gov")
167+
->setHomepage("https://dailymed.nlm.nih.gov")
168+
->setRights("use")
169+
->setLicense("http://creativecommons.org/licenses/by-nd/3.0/")
170+
->setDataset("http://identifiers.org/dailmed/");
171+
172+
$prefix = parent::getPrefix();
173+
$bVersion = parent::getParameterValue('bio2rdf_release');
174+
$date = parent::getDate(filemtime($odir.$ofile));
175+
176+
$output_file = (new DataResource($this))
177+
->setURI("http://download.bio2rdf.org/release/$bVersion/$prefix/$ofile")
178+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix")
179+
->setSource($source_file->getURI())
180+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/dailymed/dailymed.php")
181+
->setCreateDate($date)
182+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
183+
->setPublisher("http://bio2rdf.org")
184+
->setRights("use-share-modify")
185+
->setRights("by-attribution")
186+
->setRights("restricted-by-source-license")
187+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
188+
->setDataset(parent::getDatasetURI());
189+
190+
$gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true;
191+
if($gz) $output_file->setFormat("application/gzip");
192+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
193+
else $output_file->setFormat("application/n-quads");
194+
195+
$dd .= $source_file->toRDF().$output_file->toRDF();
196+
197+
}//foreach
198+
parent::writeToReleaseFile($dd);
199+
}
200+
201+
function otc($file)
202+
{
203+
return $this->prescription($file);
204+
}
205+
206+
function prescription($file)
207+
{
208+
$xml = new CXML($file);
209+
parent::addRDF(
210+
parent::describeClass(parent::getVoc()."Indication-Section", "FDA product label indication section")
211+
);
212+
213+
while($xml->parse("document") == TRUE) {
214+
$x = $xml->getXMLRoot();
215+
$setid = $x->setId->attributes()->root;
216+
$id = parent::getNamespace().$setid;
217+
$title = addslashes(str_replace( array("\t", "\r","\n", '"'), array(" ","","",""), (string) $x->title));
218+
#@todo look elsewhere if empty
219+
220+
$type_id = "loinc:".$x->code->attributes()->code;
221+
$type_label = $x->code->attributes()->displayName;
222+
223+
parent::addRDF(
224+
parent::describeIndividual($id, $title, $type_id).
225+
parent::describeClass($type_id, $type_label)
226+
);
227+
228+
$z = 1;
229+
foreach($x->component->structuredBody->component AS $c) {
230+
$section = $c->section;
231+
$code = (string) @$section->code->attributes()->code;
232+
if($code != "34067-9" and $code != "42229-5") continue; // indications
233+
$type_id = "loinc:$code";
234+
235+
if(isset($section->text)) {
236+
$sid = parent::getRes().md5($section->text->asXML());
237+
$x = (string) $section->text->asXML();
238+
239+
$x = preg_replace('/(?i)<[^>]*>/', ' ', $x);
240+
setlocale(LC_ALL, 'en_GB');
241+
$x = @iconv('UTF-8', 'ASCII//IGNORE', $x);
242+
$x = str_replace(array('"',"'",'\\',''),'', $x);
243+
$x = trim(preg_replace("/\s+/",' ',$x));
244+
$x = addslashes($x);
245+
246+
parent::addRDF(
247+
parent::describeIndividual($sid, "indication section", $type_id).
248+
parent::triplifyString($sid, "rdf:value", $x).
249+
parent::triplifyString($sid, parent::getVoc()."strlen", strlen($x)).
250+
parent::triplify($id, parent::getVoc()."indicationSection", $sid)
251+
);
252+
}
253+
/*
254+
if(isset($section->component->section)) {
255+
foreach($section->component as $component) {
256+
$component_code = (string) @$component->section->code->attributes()->code;
257+
$component_type_id = "loinc:$component_code";
258+
$component_label = (string) $component->title;
259+
260+
$sid = parent::getRes().md5($id.$component->section->text->asXML());
261+
$content = addslashes(trim((string) $component->section->text->asXML()));
262+
if($content != "") {
263+
parent::addRDF(
264+
parent::describeIndividual($sid, "$component_label section $z for $id", $component_type_id).
265+
parent::triplifyString($sid, "rdf:value", $content).
266+
parent::triplifyString($sid, parent::getVoc()."order", $z++).
267+
parent::triplify($id, parent::getVoc()."indicationSection", $sid)
268+
);
269+
}
270+
}
271+
272+
} else
273+
*/
274+
275+
/* for processing individual paragraphs
276+
$n = 0;
277+
foreach($section->text->paragraph AS $paragraph) {
278+
$pid = parent::getRes().md5($paragraph->asXML());
279+
$content = trim((string) $paragraph);
280+
if($content == "") continue;
281+
282+
parent::addRDF(
283+
parent::describeIndividual($pid, "indication section ".++$n." for $id", $type_id).
284+
parent::triplifyString($pid, "rdf:value", $content).
285+
parent::triplifyString($pid, parent::getVoc()."order", $n).
286+
parent::triplify($id, parent::getVoc()."indicationSection", $pid)
287+
);
288+
}
289+
*/
290+
}
291+
292+
}
293+
unset($xml);
294+
parent::writeRDFBufferToWriteFile();
295+
}
296+
}
297+
?>

0 commit comments

Comments
 (0)