Skip to content

Commit c154d9a

Browse files
Merge remote-tracking branch 'upstream/release3' into pubmed-release3
2 parents 1df2aa3 + 383ebc7 commit c154d9a

File tree

11 files changed

+1666
-1328
lines changed

11 files changed

+1666
-1328
lines changed

affymetrix/affymetrix.php

Lines changed: 66 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class AffymetrixParser extends Bio2RDFizer
3232
function __construct($argv) {
3333
parent::__construct($argv,"affymetrix");
3434
parent::addParameter('files',true,null,'all','');
35+
parent::addParameter('version',false,null,'33','to set another version to parse from');
3536
parent::addParameter('download_url',false,null,'http://www.affymetrix.com/support/technical/annotationfilesmain.affx','');
3637
parent::initialize();
3738
}
@@ -76,36 +77,36 @@ function Run()
7677
}
7778
}
7879
if(!isset($myfiles)) exit; // nothing to do
79-
80-
80+
$dataset_description = '';
81+
8182
// iterate over the files
82-
83-
// print_r($myfiles);
8483
foreach($myfiles AS $rfile) {
85-
// download
8684
$base_file = substr($rfile,strrpos($rfile,"/")+1);
8785
$base_url = substr($rfile,0, strrpos($rfile,"/"));
88-
echo "processing $base_file, from $base_url".PHP_EOL;
89-
$csv_file = $base_file.".csv";
90-
$zip_file = $csv_file.".zip";
91-
92-
$lfile = $ldir.$zip_file;
93-
if(!file_exists($lfile) || $this->GetParameterValue('download') == true) {
94-
$rfile = $url.$zip_file;
95-
trigger_error("Downloading $zip_file from $rfile", E_USER_NOTICE);
96-
if(Utils::Download($base_url,array($zip_file),$ldir) === FALSE) {
97-
trigger_error("Unable to download $file. skipping", E_USER_WARNING);
98-
continue;
99-
}
100-
}
10186

102-
// set the dataset version
87+
// get and set the dataset version
10388
if(parent::getDatasetVersion() == null) {
10489
preg_match("/\.na([0-9]{2})\.annot/",$base_file,$m);
10590
if(isset($m[1])) {
10691
$this->setDatasetVersion($m[1]);
10792
}
10893
}
94+
if(parent::getDatasetVersion() != parent::getParameterValue('version')) {
95+
$base_file = str_replace(
96+
"na".parent::getDatasetVersion(),
97+
"na".parent::getParameterValue('version'),
98+
$base_file);
99+
}
100+
101+
$csv_file = $base_file.".csv";
102+
$zip_file = $csv_file.".zip";
103+
104+
$lfile = $ldir.$zip_file;
105+
if(!file_exists($lfile)) {
106+
echo "skipping: $lfile does not exist".PHP_EOL;
107+
continue;
108+
}
109+
echo "processing $base_file, from $base_url".PHP_EOL;
109110

110111
// open the zip file
111112
$zin = new ZipArchive();
@@ -119,48 +120,64 @@ function Run()
119120
return FALSE;
120121
}
121122

122-
$this->SetReadFile($lfile);
123-
$this->GetReadFile()->SetFilePointer($fp);
123+
parent::setReadFile($lfile);
124+
parent::getReadFile()->setFilePointer($fp);
124125

125126
// set the write file
126-
$outfile = $base_file.'.nt'; $gz=false;
127-
if($this->GetParameterValue('graph_uri')) {$outfile = $base_file.'.nq';}
128-
if($this->GetParameterValue('gzip')) {
129-
$outfile .= '.gz';
130-
$gz = true;
131-
}
132-
$this->setWriteFile($odir.$outfile, $gz);
127+
$gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true;
128+
$outfile = 'affymetrix-'.$base_file.".".parent::getParameterValue('output_format');
133129

134-
// parse the file
130+
$this->setWriteFile($odir.$outfile, $gz);
135131
$this->parse();
136-
137132
parent::getWriteFile()->close();
138133
parent::getReadFile()->close();
134+
parent::clear();
139135

140-
$bio2rdf_download_files[] = $this->getBio2RDFDownloadURL($this->getNamespace()).$outfile;
136+
// dataset description
137+
$source_file = (new DataResource($this))
138+
->setURI($rfile)
139+
->setTitle("Affymetrix Probeset : $base_file")
140+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($lfile)))
141+
->setFormat("text/tab-separated-value")
142+
->setFormat("application/zip")
143+
->setPublisher("http://affymetrix.com")
144+
->setHomepage("http://www.affymetrix.com/support/technical/annotationfilesmain.affx")
145+
->setRights("use")
146+
->setRights("no-commercial")
147+
->setRights("registration-required")
148+
->setLicense("http://www.affymetrix.com/about_affymetrix/legal/index.affx")
149+
->setDataset("http://identifiers.org/affy.probeset/");
141150

142-
parent::clear();
143-
}
144-
151+
$prefix = parent::getPrefix();
152+
$bVersion = parent::getParameterValue('bio2rdf_release');
153+
$date = date ("Y-m-d\TG:i:s\Z");
154+
$output_file = (new DataResource($this))
155+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$outfile")
156+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix (generated at $date)")
157+
->setSource($source_file->getURI())
158+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php")
159+
->setCreateDate($date)
160+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
161+
->setPublisher("http://bio2rdf.org")
162+
->setRights("use-share-modify")
163+
->setRights("by-attribution")
164+
->setRights("restricted-by-source-license")
165+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
166+
->setDataset(parent::getDatasetURI());
145167

146-
// generate the release file
147-
$desc = $this->getBio2RDFDatasetDescription(
148-
$this->getNamespace(),
149-
"https://github.com/bio2rdf/bio2rdf-scripts/blob/master/affymetrix/affymetrix.php",
150-
$bio2rdf_download_files,
151-
"dsfsdfs",
152-
"http://affymetrix.com/",
153-
array("use-share-modify","no-commercial"),
154-
null, // license
155-
parent::getParameterValue('download_url'),
156-
parent::getDatasetVersion()
157-
);
158-
$this->setWriteFile($odir.$this->getBio2RDFReleaseFile($this->getNamespace()));
159-
$this->getWriteFile()->write($desc);
168+
if($gz) $output_file->setFormat("application/gzip");
169+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
170+
else $output_file->setFormat("application/n-quads");
171+
172+
$dataset_description .= $source_file->toRDF().$output_file->toRDF();
173+
}
174+
// write the dataset description
175+
$this->setWriteFile($odir.$this->getBio2RDFReleaseFile());
176+
$this->getWriteFile()->write($dataset_description);
160177
$this->getWriteFile()->close();
161178

162179
return true;
163-
}
180+
}
164181

165182
function Parse()
166183
{

clinicaltrials/clinicaltrials.php

Lines changed: 69 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,78 @@ function fetch_page($url){
150150
function parse_dir(){
151151
$ignore = array("..",'.','.DS_STORE',"0");
152152
$this->setCheckPoint('dataset');
153+
154+
$prefix = parent::getPrefix();
155+
$bVersion = parent::getParameterValue('bio2rdf_release');
156+
$date = date ("Y-m-d\TG:i:s\Z");
157+
158+
$dataset_file = parent::getParameterValue("outdir").parent::getBio2RDFReleaseFile();
159+
$fp = fopen($dataset_file,"w");
160+
if($fp === FALSE) {
161+
trigger_error("Unable to open $dataset_file",E_USER_ERROR);
162+
return false;
163+
}
164+
$ids = explode(",",parent::getParameterValue('id_list'));
165+
153166
$indir = parent::getParameterValue('indir');
154167
if($handle = opendir($indir)) {
155168
echo "Processing directory $indir\n";
156169
while(($file = readdir($handle)) !== false){
157170
if (in_array($file,$ignore) || is_dir($file) ) continue;
158171
$trial_id = basename($file,'.xml');
159-
if(parent::getParameterValue('id_list') == '' || in_array($trial_id, explode(",",parent::getParameterValue('id_list')))) {
172+
if(parent::getParameterValue('id_list') == '' || in_array($trial_id, $ids)) {
160173
echo "Processing $file".PHP_EOL;
161174
$this->process_file($file);
175+
176+
$outfile = basename($file,".xml").'.'.parent::getParameterValue('output_format');
177+
178+
// make the dataset description
179+
$ouri = parent::getGraphURI(parent::getDatasetURI());
180+
parent::setGraphURI(parent::getDatasetURI());
181+
182+
$rfile = "http://clinicaltrials.gov/ct2/show/".$trial_id."?resultsxml=true";
183+
$source_version = parent::getDatasetVersion();
184+
// dataset description
185+
$source_file = (new DataResource($this))
186+
->setURI($rfile)
187+
->setTitle("Clinicaltrials")
188+
->setRetrievedDate( date ("Y-m-d\TG:i:s\Z", filemtime($indir.$file)))
189+
->setFormat("application/xml")
190+
->setPublisher("http://clinicaltrials.gov/")
191+
->setHomepage("http://clinicaltrials.gov/")
192+
->setRights("use")
193+
->setRights("by-attribution")
194+
->setLicense("http://clinicaltrials.gov/ct2/about-site/terms-conditions")
195+
->setDataset("http://identifiers.org/clinicaltrials/");
196+
197+
$output_file = (new DataResource($this))
198+
->setURI("http://download.bio2df.org/release/$bVersion/$prefix/$outfile")
199+
->setTitle("Bio2RDF v$bVersion RDF version of $prefix v$source_version")
200+
->setSource($source_file->getURI())
201+
->setCreator("https://github.com/bio2rdf/bio2rdf-scripts/blob/master/clinicaltrials/clinicaltrials.php")
202+
->setCreateDate($date)
203+
->setHomepage("http://download.bio2rdf.org/release/$bVersion/$prefix/$prefix.html")
204+
->setPublisher("http://bio2rdf.org")
205+
->setRights("use-share-modify")
206+
->setRights("by-attribution")
207+
->setRights("restricted-by-source-license")
208+
->setLicense("http://creativecommons.org/licenses/by/3.0/")
209+
->setDataset(parent::getDatasetURI());
210+
211+
$gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true;
212+
if($gz) $output_file->setFormat("application/gzip");
213+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
214+
else $output_file->setFormat("application/n-quads");
215+
216+
fwrite($fp, $source_file->toRDF().$output_file->toRDF());
217+
parent::setGraphURI(parent::setDatasetURI($ouri));
162218
}
163219
}
164220
echo "Finished\n.";
165221
closedir($handle);
222+
223+
// write the dataset description file
224+
fclose($fp);
166225
}
167226
}
168227

@@ -397,14 +456,16 @@ function process_file($infile) {
397456
);
398457
// Intervention Model: Parallel Assignment, Masking: Double-Blind, Primary Purpose: Treatment
399458
foreach(explode(", ",$study_design) AS $b) {
400-
$c = explode(": ",$b);
459+
$c = explode(": ",$b);
401460
$key = parent::getRes().md5($c[0]);
461+
if(isset($c[1])) {
402462
$value = parent::getRes().md5($c[1]);
403463
parent::addRDF(
404464
parent::describeClass($value,$c[1],parent::getVoc()."Study-Design-Parameter",$c[1]).
405465
parent::describeObjectProperty($key,$c[0],null,$c[0]).
406466
parent::triplify($study_design_id,$key, $value)
407467
);
468+
}
408469
}
409470
}
410471

@@ -527,6 +588,7 @@ function process_file($infile) {
527588
$arm_group_id = parent::getRes().md5($arm_group->asXML());
528589
$arm_group_label = $this->getString('./arm_group_label',$arm_group);
529590
$arm_group_type = ucfirst(str_replace(" ","_",$this->getString('./arm_group_type',$arm_group)));
591+
if(!$arm_group_type) $arm_group_type = "Clinical-Arm";
530592
$description = $this->getString('./description',$arm_group);
531593

532594
parent::addRDF(
@@ -790,10 +852,14 @@ function process_file($infile) {
790852
$name_title = $this->getString('//responsible_party/name_title');
791853
$organization = $this->getString('//responsible_party/organization');
792854
$party_type = $this->getString('//responsible_party/party_type');
855+
$label = '';
856+
if($name_title) $label = $name_title;
857+
if($organization) $label .= (($name_title !== '')?", ":"").$organization;
858+
if(!$label && $party_type) $label = $party_type;
793859

794860
parent::addRDF(
795861
parent::triplify($study_id,parent::getVoc()."responsible-party",$rp_id).
796-
parent::describeIndividual($rp_id,"$name_title, $organization",parent::getVoc()."Responsible-Party")
862+
parent::describeIndividual($rp_id,$label,parent::getVoc()."Responsible-Party")
797863
);
798864
if($party_type) parent::addRDF(parent::triplifyString($rp_id,parent::getVoc()."party-type",$party_type));
799865
if($name_title) parent::addRDF(parent::triplifyString($rp_id,parent::getVoc()."name-title",$name_title));

0 commit comments

Comments
 (0)