Skip to content

Commit 1933d40

Browse files
Merge pull request #424 from micheldumontier/release3
variety of fixes for release 4
2 parents 7fcb7bd + be45220 commit 1933d40

File tree

15 files changed

+120
-109
lines changed

15 files changed

+120
-109
lines changed

affymetrix/affymetrix.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class AffymetrixParser extends Bio2RDFizer
3232
function __construct($argv) {
3333
parent::__construct($argv,"affymetrix");
3434
parent::addParameter('files',true,null,'all','');
35-
parent::addParameter('version',false,null,'33','to set another version to parse from');
35+
parent::addParameter('version',false,null,'35','to set another version to parse from');
3636
parent::addParameter('download_url',false,null,'http://www.affymetrix.com/support/technical/annotationfilesmain.affx','');
3737
parent::initialize();
3838
}

bioportal/bioportal.php

+18-10
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@ function __construct($argv) {
3636
parent::__construct($argv,'bioportal');
3737
parent::addParameter('files',true,null,'all','all or comma-separated list of ontology short names to process');
3838
parent::addParameter('download_url',false,null,'http://data.bioontology.org/');
39-
parent::addParameter('exclude',false,null,null,null,'ontologies to exclude - use acronyms');
39+
parent::addParameter('exclude',false,null,"AURA",'ontologies to exclude - use acronyms');
40+
parent::addParameter('continue_from',false,null,"",'the ontology abbreviation to restart from');
4041
parent::addParameter('ncbo_api_key',false,null,null,'BioPortal API key (please use your own)');
4142
parent::addParameter('ncbo_api_key_file',false,null,'ncbo.api.key','BioPortal API key file');
4243
parent::addParameter('detail',false,'min|min+|max','max','min:generate rdfs:label and rdfs:subClassOf axioms; min+: min + owl axioms');
@@ -82,23 +83,30 @@ function Run()
8283
if(parent::getParameterValue('exclude') != '') {
8384
$exclude_list = explode(",",parent::getParameterValue('exclude'));
8485
}
85-
86+
$continue_from = parent::getParameterValue('continue_from');
87+
$go = true;
88+
if($continue_from) $go = false;
8689
// now go through the list of ontologies
90+
8791
$ontologies = json_decode(file_get_contents($olist), false);
8892
$total = count($ontologies);
8993
foreach($ontologies AS $i => $o) {
9094
$label = (string) $o->name;
9195
$abbv = (string) $o->acronym;
96+
97+
if($continue_from and $continue_from == $abbv) $go = true;
98+
if($go == false) continue;
99+
92100
if(array_search($abbv,$exclude_list) !== FALSE) {
93101
continue;
94102
}
95103
if($include_list[0] != 'all') {
96104
// ignore if we don't find it in the include list OR we do find it in the exclude list
97-
if( (array_search($abbv,$include_list) === FALSE)
98-
|| (array_search($abbv,$exclude_list) !== FALSE) ) {
99-
//echo "skipping $label ($abbv format=$format)".PHP_EOL;
105+
if(array_search($abbv,$include_list) === FALSE) {
100106
continue;
101107
}
108+
} else if(array_search($abbv,$exclude_list) !== FALSE ) {
109+
continue;
102110
}
103111

104112
// get info on the latest submission
@@ -117,7 +125,7 @@ function Run()
117125
$rfile = $ls['ontology']['links']['download'];
118126

119127
$lfile = $abbv.".".$format.".gz";
120-
if(parent::getParameterValue('download') == 'true') {
128+
if(!file_exists($idir.$lfile) or parent::getParameterValue('download') == 'true') {
121129
echo "downloading ... ";
122130

123131
$ch = curl_init(); // create cURL handle (ch)
@@ -202,16 +210,16 @@ function Run()
202210
->setRights("by-attribution")
203211
->setRights("restricted-by-source-license")
204212
->setLicense("http://creativecommons.org/licenses/by/3.0/")
205-
->setDataset(parent::getDatasetURI());
213+
->setDataset(parent::getDatasetURI());
206214

207215
if($gz) $output_file->setFormat("application/gzip");
208-
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
209-
else $output_file->setFormat("application/n-quads");
216+
if(strstr(parent::getParameterValue('output_format'),"nt")) $output_file->setFormat("application/n-triples");
217+
else $output_file->setFormat("application/n-quads");
210218

211219
if(!isset($dd)) {
212220
$dd = fopen($odir.'bio2rdf-bioportal.nq',"w");
213221
}
214-
fwrite($dd, $source_file->toRDF().$output_file->toRDF());
222+
fwrite($dd, $source_file->toRDF().$output_file->toRDF());
215223
fflush($dd);
216224
echo "done!".PHP_EOL;
217225
}

dbsnp/dbsnp.php

+3-4
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ function getSNPs($clinical_flag = false)
151151
$clinical = array("pathogenic","probable-pathogenic","drug-response","other");
152152
if($clinical_flag == true) {
153153
$term = implode("[Clinical Significance] or ",$all);
154-
$term = '"'.substr($term,0)."\"[Clinical Significance]";
154+
$term = '"'.substr($term,0)."[Clinical Significance]";
155155
} else {
156156
$term = "snp";
157157
}
@@ -160,16 +160,15 @@ function getSNPs($clinical_flag = false)
160160
echo "Downloading snp list ";
161161
$xmlfile = $this->getParameterValue('indir').'snp.list.xml';
162162
$retmax = 10000000;
163-
// $retmax = 10;
164163
$start = 0;
165164
$mylist = array();
166165
do {
167166
echo count($mylist).PHP_EOL;
168167
$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=snp&retmax=$retmax&term=".urlencode($term)."&retstart=$start";
169-
170168
$c = file_get_contents($url);
171169
preg_match_all("/<Id>([^\<]+)<\/Id>/",$c,$m);
172-
if(!isset($m[1])) break;
170+
if(!isset($m[1]) or count($m[1]) == 0) break;
171+
173172
$mylist = array_merge($mylist,$m[1]);
174173
$start += $retmax;
175174
} while(true);

drugbank/drugbank.php

+1
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ function parsePartnerEntry($did, $pid, $x)
226226
if($k2 == 'external-identifier') {
227227
$ns = $this->NSMap($v2->resource);
228228
$id = (string) $v2->identifier;
229+
$id = str_replace(array(" "),"",$id);
229230
$id = str_replace(array("HGNC:","GNC:"),"",$id);
230231
parent::addRDF(
231232
parent::triplify($pid, parent::getVoc()."x-$ns","$ns:$id")

genage/genage.php

+56-71
Original file line numberDiff line numberDiff line change
@@ -215,30 +215,48 @@ function human(){
215215
return false;
216216
}
217217

218+
/*
219+
[0] GenAge ID
220+
[1] symbol
221+
[2] aliases
222+
[3] name
223+
[4] why
224+
[5] entrez gene id
225+
[6] swissprot/uniprot
226+
[7] band
227+
[8] location start
228+
[9] location end
229+
[10] orientation
230+
[11] acc promoter
231+
[12] acc orf
232+
[13] acc cds
233+
[14] references
234+
[15] orthologs
235+
*/
236+
218237
while($l = parent::getReadFile()->read(200000)) {
219238
$data = str_getcsv($l);
239+
220240
$hagr = str_pad($data[0], 4, "0", STR_PAD_LEFT);
221-
$aliases = $data[1];
222-
$hgnc_symbol = $data[2];
223-
$common_name = $data[3];
224-
$ncbi_gene_id = $data[4];
225-
$reasons = $data[5];
226-
$band = $data[6];
227-
$location_start = $data[7];
228-
$location_end = $data[8];
229-
$orientation = $data[9];
230-
$unigene_id = $data[10];
231-
$swissprot = $data[11];
232-
$acc_promoter = $data[12];
233-
$acc_orf = $data[13];
234-
$acc_cds = $data[14];
235-
$references = $data[15];
236-
// $ppis = $data[16];
237-
// $notes = $data[17];
241+
$hgnc_symbol = $data[1];
242+
$aliases = $data[2];
243+
$label = $data[3];
244+
$reasons = $data[4];
245+
$ncbigeneid = $data[5];
246+
$swissprot = $data[6];
247+
$band = $data[7];
248+
$location_start = $data[8];
249+
$location_end = $data[9];
250+
$orientation = $data[10];
251+
$acc_promoter = $data[11];
252+
$acc_orf = $data[12];
253+
$acc_cds = $data[13];
254+
$references = $data[14];
255+
$orthologs = $data[15];
238256

239257
$hagr_id = "hagr:".$hagr;
240258
parent::addRDF(
241-
parent::describeIndividual($hagr_id, $data[3], parent::getVoc()."Human-Aging-Related-Gene").
259+
parent::describeIndividual($hagr_id, $label, parent::getVoc()."Human-Aging-Related-Gene").
242260
parent::describeClass(parent::getVoc()."Human-Aging-Related-Gene","Human Aging Related Gene")
243261
);
244262

@@ -252,17 +270,16 @@ function human(){
252270
}
253271

254272
parent::addRDF(
255-
parent::triplifyString($hagr_id, parent::getVoc()."hgnc-symbol", parent::safeLiteral($hgnc_symbol))
273+
parent::triplify($hagr_id, parent::getVoc()."x-hgnc.symbol", "hgnc.symbol:".parent::safeLiteral($hgnc_symbol))
256274
);
257275

258276
parent::addRDF(
259-
parent::triplify($hagr_id, parent::getVoc()."x-ncbigene", "ncbigene:".$ncbi_gene_id)
277+
parent::triplify($hagr_id, parent::getVoc()."x-ncbigene", "ncbigene:".$ncbigeneid)
260278
);
261279

262280
if($reasons !== ""){
263281
$reasons_split = explode(",", $reasons);
264282
foreach($reasons_split as $reason){
265-
266283
parent::addRDF(
267284
parent::triplify($hagr_id, parent::getVoc()."inclusion-criteria", parent::getVoc().$inclusion_criteria[$reason][0])
268285
);
@@ -292,12 +309,6 @@ function human(){
292309
);
293310
}
294311

295-
if($unigene_id !== ""){
296-
parent::addRDF(
297-
parent::triplify($hagr_id, parent::getVoc()."x-unigene", "unigene:".$unigene_id)
298-
);
299-
}
300-
301312
if($swissprot !== ""){
302313
if(strstr($swissprot, "_")){
303314
parent::addRDF(
@@ -368,32 +379,36 @@ function models(){
368379
);
369380

370381
$h = explode(",", parent::getReadFile()->read());
371-
$expected_columns = 10;
382+
$expected_columns = 8;
372383
if(($n = count($h)) != $expected_columns) {
373384
trigger_error("Found $n columns in gene file - expecting $expected_columns!", E_USER_WARNING);
374385
return false;
375386
}
376387

388+
/*
389+
[0] GenAge ID
390+
[1] symbol
391+
[2] name
392+
[3] organism
393+
[4] entrez gene id
394+
[5] avg lifespan change (max obsv)
395+
[6] lifespan effect
396+
[7] longevity influence
397+
*/
377398
while($l = parent::getReadFile()->read(200000)) {
378399
$data = str_getcsv($l);
379-
400+
380401
$genage = str_pad($data[0], 4, "0", STR_PAD_LEFT);
381-
$name = $data[1];
382-
$gene_symbol = $data[2];
402+
$gene_symbol = $data[1];
403+
$name = $data[2];
383404
$organism = $data[3];
384-
$function = $data[4];
385-
$ncbi_gene_id = $data[5];
386-
// $ensembl_id = $data[6];
387-
// $uniprot_id = $data[7];
388-
// $unigene_id = $data[8];
389-
$max_percent_obsv_avg_lifespan_change = $data[6];
390-
$lifespan_effect = $data[7];
391-
$longevity_influence = $data[8];
392-
$observations = $data[9];
405+
$ncbi_gene_id = $data[4];
406+
$max_percent_obsv_avg_lifespan_change = $data[5];
407+
$lifespan_effect = $data[6];
408+
$longevity_influence = $data[7];
393409

394410
$genage_id = parent::getNamespace().$genage;
395411

396-
397412
parent::addRDF(
398413
parent::describeIndividual($genage_id, $name, parent::getVoc()."Aging-Related-Gene").
399414
parent::describeClass(parent::getVoc()."Aging-Related-Gene","Aging Related Gene")
@@ -407,42 +422,12 @@ function models(){
407422
parent::triplify($genage_id, parent::getVoc()."taxon", "ncbitaxon:".$tax_ids[$organism])
408423
);
409424

410-
if($function !== ""){
411-
parent::addRDF(
412-
parent::triplifyString($genage_id, parent::getVoc()."function", parent::safeLiteral($function))
413-
);
414-
}
415-
416425
if($ncbi_gene_id !== ""){
417426
parent::addRDF(
418427
parent::triplify($genage_id, parent::getVoc()."x-ncbigene", "ncbigene:".$ncbi_gene_id)
419428
);
420429
}
421-
/*
422-
423-
if($ensembl_id !== ""){
424-
parent::addRDF(
425-
parent::triplify($genage_id, parent::getVoc()."x-ensembl", "ensembl:".$ensembl_id)
426-
);
427-
}
428-
if($uniprot_id !== ""){
429-
if(strstr($uniprot_id, "_")){
430-
parent::addRDF(
431-
parent::triplifyString($genage_id, parent::getVoc()."uniprot-entry", parent::safeLiteral($uniprot_id))
432-
);
433-
} else {
434-
parent::addRDF(
435-
parent::triplify($genage_id, parent::getVoc()."x-uniprot", "uniprot:".$uniprot_id)
436-
);
437-
}
438-
}
439430

440-
if($unigene_id !== ""){
441-
parent::addRDF(
442-
parent::triplify($genage_id, parent::getVoc()."x-unigene", "unigene:".$unigene_id)
443-
);
444-
}
445-
*/
446431
if($max_percent_obsv_avg_lifespan_change !== ""){
447432
parent::addRDF(
448433
parent::triplifyString($genage_id, parent::getVoc()."maximum-percent-observed-average-lifespan-change", parent::safeLiteral($max_percent_obsv_avg_lifespan_change))

gendr/gendr.php

+3-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
class GendrParser extends Bio2RDFizer {
2525
function __construct($argv) {
2626
parent::__construct($argv, "gendr");
27-
parent::addParameter('files', true, 'all|gene_manipulations|gene_expression','all','files to process');
27+
parent::addParameter('files', true, 'all|gene_manipulations' /* |gene_expression -- no longer supported*/ ,'all','files to process');
2828
parent::addParameter('download_url', false, null,'http://genomics.senescence.info/diet/');
2929
parent::initialize();
3030
}//constructor
@@ -184,7 +184,7 @@ function process(){
184184

185185
function gene_manipulations(){
186186
$h = explode(",", parent::getReadFile()->read());
187-
$expected_columns = 6;
187+
$expected_columns = 5;
188188
if(($n = count($h)) != $expected_columns) {
189189
trigger_error("Found $n columns in gene file - expecting $expected_columns!", E_USER_WARNING);
190190
return false;
@@ -197,7 +197,7 @@ function gene_manipulations(){
197197
$species_name = $data[2];
198198
$geneid = $data[3];
199199
$gene_name = $data[4];
200-
$references = $data[5];
200+
// $references = $data[5];
201201

202202
$gendr_id = parent::getNamespace().$gendr;
203203
$gendr_label = $gene_name." (".$gene_symbol.")";

interpro/interpro.php

+9-8
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ function Run()
5959
file_put_contents($lfile,$ret);
6060
}
6161
echo "Loading XML file...";
62-
$cxml = new CXML($ldir,$file);
62+
$cxml = new CXML($lfile);
6363
$cxml->Parse();
6464
$xml = $cxml->GetXMLRoot();
6565
echo "Done".PHP_EOL;
@@ -182,14 +182,15 @@ function Parse($xml)
182182
parent::triplifyString($s,"dc:description",$abstract)
183183
);
184184

185-
foreach($o->example_list->example AS $example) {
186-
$db = (string) $example->db_xref->attributes()->db;
187-
$id = (string) $example->db_xref->attributes()->dbkey;
188-
parent::addRDF(
189-
parent::triplify($s,parent::getVoc()."example-entry", "$db:$id")
190-
);
185+
if(isset($o->example_list)) {
186+
foreach($o->example_list->example AS $example) {
187+
$db = (string) $example->db_xref->attributes()->db;
188+
$id = (string) $example->db_xref->attributes()->dbkey;
189+
parent::addRDF(
190+
parent::triplify($s,parent::getVoc()."example-entry", "$db:$id")
191+
);
192+
}
191193
}
192-
193194
if(isset($o->parent_list->rel_ref)) {
194195
foreach($o->parent_list->rel_ref AS $parent) {
195196
$id = (string) $parent->attributes()->ipr_ref;

irefindex/irefindex.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ function Parse()
296296
);
297297
}
298298

299-
$list = explode("|",$a[3]);
299+
$list = explode("|",$a[3+($i-2)]);
300300
foreach($list AS $item) {
301301
$data = $this->ParseStringArray($item);
302302
$ns = trim($data["ns"]);

kegg/kegg.php

+1
Original file line numberDiff line numberDiff line change
@@ -798,6 +798,7 @@ function parseKGML($lfile)
798798
parent::describeClass(parent::getVoc()."Pathway-Relation","KEGG Pathway Relation").
799799
parent::triplify($relation_id, parent::getVoc()."source", $base_id.$id1).
800800
parent::triplify($relation_id, parent::getVoc()."target", $base_id.$id2).
801+
parent::triplify($relation_id, parent::getVoc()."pathway", $pathway_id).
801802
parent::triplifyString($relation_id, parent::getVoc()."type", $item['type'])
802803
);
803804
foreach($item->children() as $subtype) {

mesh/mesh.php

+3
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,9 @@ private function qualifiers(){
302302
*/
303303
private function makeSupplementaryRecord($sup_record_arr){
304304
//get the UI of the supplementary record
305+
306+
if(!isset($sup_record_arr['UI'][0]) or !isset($sup_record_arr['NM'][0])) return;
307+
305308
$sr_ui = $sup_record_arr["UI"][0];
306309
$sr_res = $this->getNamespace().$sr_ui;
307310
$sr_label = $sup_record_arr['NM'][0];

0 commit comments

Comments
 (0)