Skip to content

Commit 0735cc2

Browse files
Update omim.php
updated to https api url fixed id list processing getting full list of entries from mimtitles added some new exclusions
1 parent 865899b commit 0735cc2

File tree

1 file changed

+46
-69
lines changed

1 file changed

+46
-69
lines changed

Diff for: omim/omim.php

+46-69
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ class OMIMParser extends Bio2RDFizer
3333
function __construct($argv) {
3434
parent::__construct($argv, 'omim');
3535
parent::addParameter('files',true,null,'all|omim#','entries to process: comma-separated list or hyphen-separated range');
36-
parent::addParameter('omim_api_url',false,null,'http://api.omim.org/api/entry?include=all&format=json');
36+
parent::addParameter('omim_api_url',false,null,'https://api.omim.org/api/entry?include=all&format=json');
3737
parent::addParameter('omim_api_key',false,null);
3838
parent::addParameter('omim_api_key_file',false,null,'omim.key','A file containing your omim KEY');
3939
parent::initialize();
@@ -55,22 +55,38 @@ function Run()
5555
}
5656
} else {
5757
trigger_error("No OMIM key has been provided either by commmand line or in the expected omim key file $key_file",E_USER_WARNING);
58+
exit;
5859
}
5960
}
6061

61-
// get the list of mim2gene entries
62-
$entries = $this->GetListOfEntries($ldir);
62+
// get the list of entries
63+
$file = "mimTitles.txt";
64+
$rfile = "https://data.omim.org/downloads/$key/$file";
65+
$lfile = $ldir.$file;
66+
if(!file_exists($lfile) && parent::getParameterValue('download') == false) {
67+
trigger_error($lfile." not found. Will attempt to download.", E_USER_NOTICE);
68+
parent::setParameterValue('download',true);
69+
}
70+
if(parent::getParameterValue('download') == true) {
71+
echo "downloading $file ... ";
72+
Utils::DownloadSingle($rfile, $lfile);
73+
}
74+
// parse the file
75+
$fp = fopen($lfile,"rb");
76+
while($l = fgetcsv($fp,0,"\t")) {
77+
if($l[0][0] == "#") continue;
78+
$full_list[ $l[1] ] = "";
79+
}
6380

81+
6482
// get the work specified
6583
$list = trim(parent::getParameterValue('files'));
6684
if($list != 'all') {
6785
// check if a hyphenated list was provided
6886
if(($pos = strpos($list,"-")) !== FALSE) {
6987
$start_range = substr($list,0,$pos);
7088
$end_range = substr($list,$pos+1);
71-
72-
// get the whole list
73-
$full_list = $this->GetListOfEntries($ldir);
89+
7490
// now intersect
7591
foreach($full_list AS $e => $type) {
7692
if($e >= $start_range && $e <= $end_range) {
@@ -84,9 +100,11 @@ function Run()
84100
foreach($b AS $e) {
85101
$myentries[$e] = '';
86102
}
87-
$entries = array_intersect_key ($entries,$myentries);
103+
$entries = array_intersect_key ($full_list,$myentries);
88104
}
89-
}
105+
} else $entries = $full_list;
106+
107+
echo "Will process a total of ".count($entries)." OMIM entries".PHP_EOL;
90108

91109
// set the write file
92110
$gz = (strstr(parent::getParameterValue('output_format'),".gz") === FALSE)?false:true;
@@ -102,17 +120,13 @@ function Run()
102120
$total = count($entries);
103121
foreach($entries AS $omim_id => $type) {
104122
echo "processing ".(++$i)." of $total - omim# ";
105-
$download_file = $ldir.$omim_id.".json.gz";
106-
$gzfile = "compress.zlib://$download_file";
123+
$lfile = $ldir.$omim_id.".json.gz";
124+
$gzfile = "compress.zlib://$lfile";
107125
// download if the file doesn't exist or we are told to
108-
if(!file_exists($download_file) || parent::getParameterValue('download') == true) {
126+
if(!file_exists($lfile) || parent::getParameterValue('download') == true) {
109127
// download using the api
110-
$url = parent::getParameterValue('omim_api_url').'&apiKey='.parent::getParameterValue('omim_api_key').'&mimNumber='.$omim_id;
111-
$buf = file_get_contents($url);
112-
if(strlen($buf) != 0) {
113-
file_put_contents($download_file, $buf);
114-
usleep(500000); // limit of 4 requests per second
115-
}
128+
$rfile = parent::getParameterValue('omim_api_url').'&apiKey='.parent::getParameterValue('omim_api_key').'&mimNumber='.$omim_id;
129+
Utils::DownloadSingle($rfile, $lfile);
116130
}
117131

118132
// load entry, parse and write to file
@@ -170,57 +184,6 @@ function Run()
170184
return TRUE;
171185
}
172186

173-
function getListOfEntries($ldir)
174-
{
175-
// get the master list of entries
176-
$file = "mim2gene.txt";
177-
if(!file_exists($ldir.$file)) {
178-
trigger_error($ldir.$file." not found. Will attempt to download. ", E_USER_NOTICE);
179-
$this->SetParameterValue('download',true);
180-
}
181-
182-
if(parent::getParameterValue('download')==true) {
183-
// connect
184-
if(!isset($ftp)) {
185-
$host = 'ftp.omim.org';
186-
echo "connecting to $host ...";
187-
$ftp = ftp_connect($host);
188-
if(!$ftp) {
189-
echo "Unable to connect to $host".PHP_EOL;
190-
die;
191-
}
192-
ftp_pasv ($ftp, true) ;
193-
$login = ftp_login($ftp, 'anonymous', '[email protected]');
194-
if ((!$ftp) || (!$login)) {
195-
echo "FTP-connect failed!"; die;
196-
} else {
197-
echo "Connected".PHP_EOL;
198-
}
199-
}
200-
201-
// download
202-
ftp_pasv($ftp, true);
203-
echo "Downloading $file ...";
204-
if(ftp_get($ftp, $ldir.$file, 'OMIM/'.$file, FTP_BINARY) === FALSE) {
205-
trigger_error("Error in downloading $file");
206-
}
207-
if(isset($ftp)) ftp_close($ftp);
208-
echo "success!".PHP_EOL;
209-
}
210-
211-
// parse the mim2gene file for the entries
212-
// # Mim Number Type Gene IDs Approved Gene Symbols
213-
$fp = fopen($ldir.$file,"r");
214-
fgets($fp);
215-
while($l = fgets($fp)) {
216-
$a = explode("\t",$l);
217-
if($a[1] != "moved/removed")
218-
$list[$a[0]] = $a[1];
219-
}
220-
fclose($fp);
221-
return $list;
222-
}
223-
224187

225188
function get_phenotype_mapping_method_type($id = null, $generate_declaration = false)
226189
{
@@ -326,6 +289,14 @@ function ParseEntry($obj, $type)
326289
}
327290
}
328291

292+
// check if moved
293+
if(isset($o['movedTo'])) {
294+
$new_omim_uri = parent::getNamespace().$o['movedTo'];
295+
parent::addRDF(
296+
parent::triplify($omim_uri, parent::getVoc()."superceded-by", $new_omim_uri)
297+
);
298+
}
299+
329300
// parse text sections
330301
if(isset($o['textSectionList'])) {
331302
foreach($o['textSectionList'] AS $i => $section) {
@@ -542,7 +513,8 @@ function ParseEntry($obj, $type)
542513

543514
$ns = '';
544515
switch($k) {
545-
case 'approvedGeneSymbols': $ns = 'symbol';break;
516+
case 'hgncID': $ns = 'hgnc';break;
517+
case 'approvedGeneSymbols': $ns = 'hgnc.symbol';break;
546518
case 'geneIDs': $ns = 'ncbigene';break;
547519
case 'ncbiReferenceSequences': $ns = 'gi';break;
548520
case 'genbankNucleotideSequences': $ns = 'gi';break;
@@ -568,6 +540,11 @@ function ParseEntry($obj, $type)
568540
case 'diseaseOntologyIDs': $ns = 'do';break;
569541

570542
// specifically ignorning
543+
case 'newbornScreening':
544+
case 'clinGenDosage':
545+
case 'clinGenValidity':
546+
case 'monarch':
547+
case 'decipherSyndromes':
571548
case 'geneTests':
572549
case 'cmgGene':
573550
case 'geneticAllianceIDs': // #

0 commit comments

Comments
 (0)