@@ -33,7 +33,7 @@ class OMIMParser extends Bio2RDFizer
3333 function __construct ($ argv ) {
3434 parent ::__construct ($ argv , 'omim ' );
3535 parent ::addParameter ('files ' ,true ,null ,'all|omim# ' ,'entries to process: comma-separated list or hyphen-separated range ' );
36- parent ::addParameter ('omim_api_url ' ,false ,null ,'http ://api.omim.org/api/entry?include=all&format=json ' );
36+ parent ::addParameter ('omim_api_url ' ,false ,null ,'https ://api.omim.org/api/entry?include=all&format=json ' );
3737 parent ::addParameter ('omim_api_key ' ,false ,null );
3838 parent ::addParameter ('omim_api_key_file ' ,false ,null ,'omim.key ' ,'A file containing your omim KEY ' );
3939 parent ::initialize ();
@@ -55,22 +55,38 @@ function Run()
5555 }
5656 } else {
5757 trigger_error ("No OMIM key has been provided either by commmand line or in the expected omim key file $ key_file " ,E_USER_WARNING );
58+ exit ;
5859 }
5960 }
6061
61- // get the list of mim2gene entries
62- $ entries = $ this ->GetListOfEntries ($ ldir );
62+ // get the list of entries
63+ $ file = "mimTitles.txt " ;
64+ $ rfile = "https://data.omim.org/downloads/ $ key/ $ file " ;
65+ $ lfile = $ ldir .$ file ;
66+ if (!file_exists ($ lfile ) && parent ::getParameterValue ('download ' ) == false ) {
67+ trigger_error ($ lfile ." not found. Will attempt to download. " , E_USER_NOTICE );
68+ parent ::setParameterValue ('download ' ,true );
69+ }
70+ if (parent ::getParameterValue ('download ' ) == true ) {
71+ echo "downloading $ file ... " ;
72+ Utils::DownloadSingle ($ rfile , $ lfile );
73+ }
74+ // parse the file
75+ $ fp = fopen ($ lfile ,"rb " );
76+ while ($ l = fgetcsv ($ fp ,0 ,"\t" )) {
77+ if ($ l [0 ][0 ] == "# " ) continue ;
78+ $ full_list [ $ l [1 ] ] = "" ;
79+ }
6380
81+
6482 // get the work specified
6583 $ list = trim (parent ::getParameterValue ('files ' ));
6684 if ($ list != 'all ' ) {
6785 // check if a hyphenated list was provided
6886 if (($ pos = strpos ($ list ,"- " )) !== FALSE ) {
6987 $ start_range = substr ($ list ,0 ,$ pos );
7088 $ end_range = substr ($ list ,$ pos +1 );
71-
72- // get the whole list
73- $ full_list = $ this ->GetListOfEntries ($ ldir );
89+
7490 // now intersect
7591 foreach ($ full_list AS $ e => $ type ) {
7692 if ($ e >= $ start_range && $ e <= $ end_range ) {
@@ -84,9 +100,11 @@ function Run()
84100 foreach ($ b AS $ e ) {
85101 $ myentries [$ e ] = '' ;
86102 }
87- $ entries = array_intersect_key ($ entries ,$ myentries );
103+ $ entries = array_intersect_key ($ full_list ,$ myentries );
88104 }
89- }
105+ } else $ entries = $ full_list ;
106+
107+ echo "Will process a total of " .count ($ entries )." OMIM entries " .PHP_EOL ;
90108
91109 // set the write file
92110 $ gz = (strstr (parent ::getParameterValue ('output_format ' ),".gz " ) === FALSE )?false :true ;
@@ -102,17 +120,13 @@ function Run()
102120 $ total = count ($ entries );
103121 foreach ($ entries AS $ omim_id => $ type ) {
104122 echo "processing " .(++$ i )." of $ total - omim# " ;
105- $ download_file = $ ldir .$ omim_id .".json.gz " ;
106- $ gzfile = "compress.zlib:// $ download_file " ;
123+ $ lfile = $ ldir .$ omim_id .".json.gz " ;
124+ $ gzfile = "compress.zlib:// $ lfile " ;
107125 // download if the file doesn't exist or we are told to
108- if (!file_exists ($ download_file ) || parent ::getParameterValue ('download ' ) == true ) {
126+ if (!file_exists ($ lfile ) || parent ::getParameterValue ('download ' ) == true ) {
109127 // download using the api
110- $ url = parent ::getParameterValue ('omim_api_url ' ).'&apiKey= ' .parent ::getParameterValue ('omim_api_key ' ).'&mimNumber= ' .$ omim_id ;
111- $ buf = file_get_contents ($ url );
112- if (strlen ($ buf ) != 0 ) {
113- file_put_contents ($ download_file , $ buf );
114- usleep (500000 ); // limit of 4 requests per second
115- }
128+ $ rfile = parent ::getParameterValue ('omim_api_url ' ).'&apiKey= ' .parent ::getParameterValue ('omim_api_key ' ).'&mimNumber= ' .$ omim_id ;
129+ Utils::DownloadSingle ($ rfile , $ lfile );
116130 }
117131
118132 // load entry, parse and write to file
@@ -170,57 +184,6 @@ function Run()
170184 return TRUE ;
171185 }
172186
173- function getListOfEntries ($ ldir )
174- {
175- // get the master list of entries
176- $ file = "mim2gene.txt " ;
177- if (!file_exists ($ ldir .$ file )) {
178- trigger_error ($ ldir .$ file ." not found. Will attempt to download. " , E_USER_NOTICE );
179- $ this ->SetParameterValue ('download ' ,true );
180- }
181-
182- if (parent ::getParameterValue ('download ' )==true ) {
183- // connect
184- if (!isset ($ ftp )) {
185- $ host = 'ftp.omim.org ' ;
186- echo "connecting to $ host ... " ;
187- $ ftp = ftp_connect ($ host );
188- if (!$ ftp ) {
189- echo "Unable to connect to $ host " .PHP_EOL ;
190- die;
191- }
192- ftp_pasv ($ ftp , true ) ;
193- $ login =
ftp_login (
$ ftp,
'anonymous ' ,
'[email protected] ' );
194- if ((!$ ftp ) || (!$ login )) {
195- echo "FTP-connect failed! " ; die;
196- } else {
197- echo "Connected " .PHP_EOL ;
198- }
199- }
200-
201- // download
202- ftp_pasv ($ ftp , true );
203- echo "Downloading $ file ... " ;
204- if (ftp_get ($ ftp , $ ldir .$ file , 'OMIM/ ' .$ file , FTP_BINARY ) === FALSE ) {
205- trigger_error ("Error in downloading $ file " );
206- }
207- if (isset ($ ftp )) ftp_close ($ ftp );
208- echo "success! " .PHP_EOL ;
209- }
210-
211- // parse the mim2gene file for the entries
212- // # Mim Number Type Gene IDs Approved Gene Symbols
213- $ fp = fopen ($ ldir .$ file ,"r " );
214- fgets ($ fp );
215- while ($ l = fgets ($ fp )) {
216- $ a = explode ("\t" ,$ l );
217- if ($ a [1 ] != "moved/removed " )
218- $ list [$ a [0 ]] = $ a [1 ];
219- }
220- fclose ($ fp );
221- return $ list ;
222- }
223-
224187
225188 function get_phenotype_mapping_method_type ($ id = null , $ generate_declaration = false )
226189 {
@@ -326,6 +289,14 @@ function ParseEntry($obj, $type)
326289 }
327290 }
328291
292+ // check if moved
293+ if (isset ($ o ['movedTo ' ])) {
294+ $ new_omim_uri = parent ::getNamespace ().$ o ['movedTo ' ];
295+ parent ::addRDF (
296+ parent ::triplify ($ omim_uri , parent ::getVoc ()."superceded-by " , $ new_omim_uri )
297+ );
298+ }
299+
329300 // parse text sections
330301 if (isset ($ o ['textSectionList ' ])) {
331302 foreach ($ o ['textSectionList ' ] AS $ i => $ section ) {
@@ -542,7 +513,8 @@ function ParseEntry($obj, $type)
542513
543514 $ ns = '' ;
544515 switch ($ k ) {
545- case 'approvedGeneSymbols ' : $ ns = 'symbol ' ;break ;
516+ case 'hgncID ' : $ ns = 'hgnc ' ;break ;
517+ case 'approvedGeneSymbols ' : $ ns = 'hgnc.symbol ' ;break ;
546518 case 'geneIDs ' : $ ns = 'ncbigene ' ;break ;
547519 case 'ncbiReferenceSequences ' : $ ns = 'gi ' ;break ;
548520 case 'genbankNucleotideSequences ' : $ ns = 'gi ' ;break ;
@@ -568,6 +540,11 @@ function ParseEntry($obj, $type)
568540 case 'diseaseOntologyIDs ' : $ ns = 'do ' ;break ;
569541
570542 // specifically ignorning
543+ case 'newbornScreening ' :
544+ case 'clinGenDosage ' :
545+ case 'clinGenValidity ' :
546+ case 'monarch ' :
547+ case 'decipherSyndromes ' :
571548 case 'geneTests ' :
572549 case 'cmgGene ' :
573550 case 'geneticAllianceIDs ' : // #
0 commit comments