Skip to content

Commit c971c6c

Browse files
Merge pull request #269 from micheldumontier/clinicaltrials
Clinicaltrials - update
2 parents 7e44556 + 10662e7 commit c971c6c

File tree

1 file changed

+60
-1
lines changed

1 file changed

+60
-1
lines changed

clinicaltrials/clinicaltrials.php

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,78 @@ function fetch_page($url){
150150
function parse_dir(){
151151
$ignore = array("..",'.','.DS_STORE',"0");
152152
$this->setCheckPoint('dataset');
153+
154+
$prefix = parent::getPrefix();
155+
$bVersion = parent::getParameterValue('bio2rdf_release');
156+
$date = date ("Y-m-d\TG:i:s\Z");
157+
158+
$dataset_file = parent::getParameterValue("outdir").parent::getBio2RDFReleaseFile();
159+
$fp = fopen($dataset_file,"w");
160+
if($fp === FALSE) {
161+
trigger_error("Unable to open $dataset_file",E_USER_ERROR);
162+
return false;
163+
}
164+
$ids = explode(",",parent::getParameterValue('id_list'));
165+
153166
$indir = parent::getParameterValue('indir');
154167
if($handle = opendir($indir)) {
155168
echo "Processing directory $indir\n";
156169
while(($file = readdir($handle)) !== false){
157170
if (in_array($file,$ignore) || is_dir($file) ) continue;
158171
$trial_id = basename($file,'.xml');
159-
if(parent::getParameterValue('id_list') == '' || in_array($trial_id, explode(",",parent::getParameterValue('id_list')))) {
172+
if(parent::getParameterValue('id_list') == '' || in_array($trial_id, $ids)) {
160173
echo "Processing $file".PHP_EOL;
161174
$this->process_file($file);
175+
176+
$outfile = basename($file,".xml").'.'.parent::getParameterValue('output_format');
177+
178+
// make the dataset description
179+
$ouri = parent::getGraphURI(parent::getDatasetURI());
180+
parent::setGraphURI(parent::getDatasetURI());
181+
182+
$rfile = "http://clinicaltrials.gov/ct2/show/".$trial_id."?resultsxml=true";
183+
$source_version = parent::getDatasetVersion();
184+
// dataset description
185+
$source_file = (new DataResource($this))
186+
->setURI($rfile)
187+
->setTitle("Clinicaltrials")
188+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($indir.$file)))
189+
->setFormat("application/xml")
190+
->setPublisher("http://clinicaltrials.gov/")
191+
->setHomepage("http://clinicaltrials.gov/")
192+
->setRights("use")
193+
->setRights("by-attribution")
194+
->setLicense("http://clinicaltrials.gov/ct2/about-site/terms-conditions")
195+
->setDataset("http://identifiers.org/clinicaltrials/");
196+
197+
$output_file = (new DataResource($this))
198+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$outfile")
199+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix v$source_version")
200+
->setSource($source_file->getURI())
201+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/clinicaltrials/clinicaltrials.php")
202+
->setCreateDate($date)
203+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
204+
->setPublisher("http://bio2rdf.org")
205+
->setRights("use-share-modify")
206+
->setRights("by-attribution")
207+
->setRights("restricted-by-source-license")
208+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
209+
->setDataset(parent::getDatasetURI());
210+
211+
$gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true;
212+
if($gz) $output_file->setFormat("application/gzip");
213+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
214+
else $output_file->setFormat("application/n-quads");
215+
216+
fwrite($fp, $source_file->toRDF().$output_file->toRDF());
217+
parent::setGraphURI(parent::setDatasetURI($ouri));
162218
}
163219
}
164220
echo "Finished\n.";
165221
closedir($handle);
222+
223+
// write the dataset description file
224+
fclose($fp);
166225
}
167226
}
168227

0 commit comments

Comments
 (0)