@@ -33,7 +33,7 @@ class OMIMParser extends Bio2RDFizer
33
33
function __construct ($ argv ) {
34
34
parent ::__construct ($ argv , 'omim ' );
35
35
parent ::addParameter ('files ' ,true ,null ,'all|omim# ' ,'entries to process: comma-separated list or hyphen-separated range ' );
36
- parent ::addParameter ('omim_api_url ' ,false ,null ,'http ://api.omim.org/api/entry?include=all&format=json ' );
36
+ parent ::addParameter ('omim_api_url ' ,false ,null ,'https ://api.omim.org/api/entry?include=all&format=json ' );
37
37
parent ::addParameter ('omim_api_key ' ,false ,null );
38
38
parent ::addParameter ('omim_api_key_file ' ,false ,null ,'omim.key ' ,'A file containing your omim KEY ' );
39
39
parent ::initialize ();
@@ -55,22 +55,38 @@ function Run()
55
55
}
56
56
} else {
57
57
trigger_error ("No OMIM key has been provided either by commmand line or in the expected omim key file $ key_file " ,E_USER_WARNING );
58
+ exit ;
58
59
}
59
60
}
60
61
61
- // get the list of mim2gene entries
62
- $ entries = $ this ->GetListOfEntries ($ ldir );
62
+ // get the list of entries
63
+ $ file = "mimTitles.txt " ;
64
+ $ rfile = "https://data.omim.org/downloads/ $ key/ $ file " ;
65
+ $ lfile = $ ldir .$ file ;
66
+ if (!file_exists ($ lfile ) && parent ::getParameterValue ('download ' ) == false ) {
67
+ trigger_error ($ lfile ." not found. Will attempt to download. " , E_USER_NOTICE );
68
+ parent ::setParameterValue ('download ' ,true );
69
+ }
70
+ if (parent ::getParameterValue ('download ' ) == true ) {
71
+ echo "downloading $ file ... " ;
72
+ Utils::DownloadSingle ($ rfile , $ lfile );
73
+ }
74
+ // parse the file
75
+ $ fp = fopen ($ lfile ,"rb " );
76
+ while ($ l = fgetcsv ($ fp ,0 ,"\t" )) {
77
+ if ($ l [0 ][0 ] == "# " ) continue ;
78
+ $ full_list [ $ l [1 ] ] = "" ;
79
+ }
63
80
81
+
64
82
// get the work specified
65
83
$ list = trim (parent ::getParameterValue ('files ' ));
66
84
if ($ list != 'all ' ) {
67
85
// check if a hyphenated list was provided
68
86
if (($ pos = strpos ($ list ,"- " )) !== FALSE ) {
69
87
$ start_range = substr ($ list ,0 ,$ pos );
70
88
$ end_range = substr ($ list ,$ pos +1 );
71
-
72
- // get the whole list
73
- $ full_list = $ this ->GetListOfEntries ($ ldir );
89
+
74
90
// now intersect
75
91
foreach ($ full_list AS $ e => $ type ) {
76
92
if ($ e >= $ start_range && $ e <= $ end_range ) {
@@ -84,9 +100,11 @@ function Run()
84
100
foreach ($ b AS $ e ) {
85
101
$ myentries [$ e ] = '' ;
86
102
}
87
- $ entries = array_intersect_key ($ entries ,$ myentries );
103
+ $ entries = array_intersect_key ($ full_list ,$ myentries );
88
104
}
89
- }
105
+ } else $ entries = $ full_list ;
106
+
107
+ echo "Will process a total of " .count ($ entries )." OMIM entries " .PHP_EOL ;
90
108
91
109
// set the write file
92
110
$ gz = (strstr (parent ::getParameterValue ('output_format ' ),".gz " ) === FALSE )?false :true ;
@@ -102,17 +120,13 @@ function Run()
102
120
$ total = count ($ entries );
103
121
foreach ($ entries AS $ omim_id => $ type ) {
104
122
echo "processing " .(++$ i )." of $ total - omim# " ;
105
- $ download_file = $ ldir .$ omim_id .".json.gz " ;
106
- $ gzfile = "compress.zlib:// $ download_file " ;
123
+ $ lfile = $ ldir .$ omim_id .".json.gz " ;
124
+ $ gzfile = "compress.zlib:// $ lfile " ;
107
125
// download if the file doesn't exist or we are told to
108
- if (!file_exists ($ download_file ) || parent ::getParameterValue ('download ' ) == true ) {
126
+ if (!file_exists ($ lfile ) || parent ::getParameterValue ('download ' ) == true ) {
109
127
// download using the api
110
- $ url = parent ::getParameterValue ('omim_api_url ' ).'&apiKey= ' .parent ::getParameterValue ('omim_api_key ' ).'&mimNumber= ' .$ omim_id ;
111
- $ buf = file_get_contents ($ url );
112
- if (strlen ($ buf ) != 0 ) {
113
- file_put_contents ($ download_file , $ buf );
114
- usleep (500000 ); // limit of 4 requests per second
115
- }
128
+ $ rfile = parent ::getParameterValue ('omim_api_url ' ).'&apiKey= ' .parent ::getParameterValue ('omim_api_key ' ).'&mimNumber= ' .$ omim_id ;
129
+ Utils::DownloadSingle ($ rfile , $ lfile );
116
130
}
117
131
118
132
// load entry, parse and write to file
@@ -170,57 +184,6 @@ function Run()
170
184
return TRUE ;
171
185
}
172
186
173
- function getListOfEntries ($ ldir )
174
- {
175
- // get the master list of entries
176
- $ file = "mim2gene.txt " ;
177
- if (!file_exists ($ ldir .$ file )) {
178
- trigger_error ($ ldir .$ file ." not found. Will attempt to download. " , E_USER_NOTICE );
179
- $ this ->SetParameterValue ('download ' ,true );
180
- }
181
-
182
- if (parent ::getParameterValue ('download ' )==true ) {
183
- // connect
184
- if (!isset ($ ftp )) {
185
- $ host = 'ftp.omim.org ' ;
186
- echo "connecting to $ host ... " ;
187
- $ ftp = ftp_connect ($ host );
188
- if (!$ ftp ) {
189
- echo "Unable to connect to $ host " .PHP_EOL ;
190
- die;
191
- }
192
- ftp_pasv ($ ftp , true ) ;
193
- $ login =
ftp_login (
$ ftp,
'anonymous ' ,
'[email protected] ' );
194
- if ((!$ ftp ) || (!$ login )) {
195
- echo "FTP-connect failed! " ; die;
196
- } else {
197
- echo "Connected " .PHP_EOL ;
198
- }
199
- }
200
-
201
- // download
202
- ftp_pasv ($ ftp , true );
203
- echo "Downloading $ file ... " ;
204
- if (ftp_get ($ ftp , $ ldir .$ file , 'OMIM/ ' .$ file , FTP_BINARY ) === FALSE ) {
205
- trigger_error ("Error in downloading $ file " );
206
- }
207
- if (isset ($ ftp )) ftp_close ($ ftp );
208
- echo "success! " .PHP_EOL ;
209
- }
210
-
211
- // parse the mim2gene file for the entries
212
- // # Mim Number Type Gene IDs Approved Gene Symbols
213
- $ fp = fopen ($ ldir .$ file ,"r " );
214
- fgets ($ fp );
215
- while ($ l = fgets ($ fp )) {
216
- $ a = explode ("\t" ,$ l );
217
- if ($ a [1 ] != "moved/removed " )
218
- $ list [$ a [0 ]] = $ a [1 ];
219
- }
220
- fclose ($ fp );
221
- return $ list ;
222
- }
223
-
224
187
225
188
function get_phenotype_mapping_method_type ($ id = null , $ generate_declaration = false )
226
189
{
@@ -326,6 +289,14 @@ function ParseEntry($obj, $type)
326
289
}
327
290
}
328
291
292
+ // check if moved
293
+ if (isset ($ o ['movedTo ' ])) {
294
+ $ new_omim_uri = parent ::getNamespace ().$ o ['movedTo ' ];
295
+ parent ::addRDF (
296
+ parent ::triplify ($ omim_uri , parent ::getVoc ()."superceded-by " , $ new_omim_uri )
297
+ );
298
+ }
299
+
329
300
// parse text sections
330
301
if (isset ($ o ['textSectionList ' ])) {
331
302
foreach ($ o ['textSectionList ' ] AS $ i => $ section ) {
@@ -542,7 +513,8 @@ function ParseEntry($obj, $type)
542
513
543
514
$ ns = '' ;
544
515
switch ($ k ) {
545
- case 'approvedGeneSymbols ' : $ ns = 'symbol ' ;break ;
516
+ case 'hgncID ' : $ ns = 'hgnc ' ;break ;
517
+ case 'approvedGeneSymbols ' : $ ns = 'hgnc.symbol ' ;break ;
546
518
case 'geneIDs ' : $ ns = 'ncbigene ' ;break ;
547
519
case 'ncbiReferenceSequences ' : $ ns = 'gi ' ;break ;
548
520
case 'genbankNucleotideSequences ' : $ ns = 'gi ' ;break ;
@@ -568,6 +540,11 @@ function ParseEntry($obj, $type)
568
540
case 'diseaseOntologyIDs ' : $ ns = 'do ' ;break ;
569
541
570
542
// specifically ignorning
543
+ case 'newbornScreening ' :
544
+ case 'clinGenDosage ' :
545
+ case 'clinGenValidity ' :
546
+ case 'monarch ' :
547
+ case 'decipherSyndromes ' :
571
548
case 'geneTests ' :
572
549
case 'cmgGene ' :
573
550
case 'geneticAllianceIDs ' : // #
0 commit comments