@@ -93,7 +93,7 @@ int tbx_parse1(const tbx_conf_t *conf, int len, char *line, tbx_intv_t *intv)
93
93
intv -> ss = line + b ; intv -> se = line + i ;
94
94
} else if (id == conf -> bc ) {
95
95
// here ->beg is 0-based.
96
- intv -> beg = intv -> end = strtol (line + b , & s , 0 );
96
+ intv -> beg = intv -> end = strtoll (line + b , & s , 0 );
97
97
if ( s == line + b ) return -1 ; // expected int
98
98
if (!(conf -> preset & TBX_UCSC )) -- intv -> beg ;
99
99
else ++ intv -> end ;
@@ -103,7 +103,7 @@ int tbx_parse1(const tbx_conf_t *conf, int len, char *line, tbx_intv_t *intv)
103
103
if ((conf -> preset & 0xffff ) == TBX_GENERIC ) {
104
104
if (id == conf -> ec )
105
105
{
106
- intv -> end = strtol (line + b , & s , 0 );
106
+ intv -> end = strtoll (line + b , & s , 0 );
107
107
if ( s == line + b ) return -1 ; // expected int
108
108
}
109
109
} else if ((conf -> preset & 0xffff ) == TBX_SAM ) {
@@ -131,7 +131,7 @@ int tbx_parse1(const tbx_conf_t *conf, int len, char *line, tbx_intv_t *intv)
131
131
s = strstr (line + b , ";END=" );
132
132
if (s ) s += 5 ;
133
133
}
134
- if (s ) intv -> end = strtol (s , & s , 0 );
134
+ if (s ) intv -> end = strtoll (s , & s , 0 );
135
135
line [i ] = c ;
136
136
}
137
137
}
@@ -220,6 +220,44 @@ static int tbx_set_meta(tbx_t *tbx)
220
220
return 0 ;
221
221
}
222
222
223
+ // Minimal effort parser to extract reference length out of VCF header line
224
+ // This is used only used to adjust the number of levels if necessary,
225
+ // so not a major problem if it doesn't always work.
226
+ static void adjust_max_ref_len_vcf (const char * str , int64_t * max_ref_len )
227
+ {
228
+ const char * ptr ;
229
+ int64_t len ;
230
+ if (strncmp (str , "##contig" , 8 ) != 0 ) return ;
231
+ ptr = strstr (str + 8 , "length" );
232
+ if (!ptr ) return ;
233
+ for (ptr += 6 ; * ptr == ' ' || * ptr == '=' ; ptr ++ ) {}
234
+ len = strtoll (ptr , NULL , 10 );
235
+ if (* max_ref_len < len ) * max_ref_len = len ;
236
+ }
237
+
238
+ // Same for sam files
239
+ static void adjust_max_ref_len_sam (const char * str , int64_t * max_ref_len )
240
+ {
241
+ const char * ptr ;
242
+ int64_t len ;
243
+ if (strncmp (str , "@SQ" , 3 ) != 0 ) return ;
244
+ ptr = strstr (str + 3 , "\tLN:" );
245
+ if (!ptr ) return ;
246
+ ptr += 4 ;
247
+ len = strtoll (ptr , NULL , 10 );
248
+ if (* max_ref_len < len ) * max_ref_len = len ;
249
+ }
250
+
251
+ // Adjusts number of levels if not big enough. This can happen for
252
+ // files with very large contigs.
253
+ static int adjust_n_lvls (int min_shift , int n_lvls , int64_t max_len )
254
+ {
255
+ int64_t s = 1LL << (min_shift + n_lvls * 3 );
256
+ max_len += 256 ;
257
+ for (; max_len > s ; ++ n_lvls , s <<= 3 ) {}
258
+ return n_lvls ;
259
+ }
260
+
223
261
tbx_t * tbx_index (BGZF * fp , int min_shift , const tbx_conf_t * conf )
224
262
{
225
263
tbx_t * tbx ;
@@ -228,6 +266,7 @@ tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf)
228
266
int64_t lineno = 0 ;
229
267
uint64_t last_off = 0 ;
230
268
tbx_intv_t intv ;
269
+ int64_t max_ref_len = 0 ;
231
270
232
271
str .s = 0 ; str .l = str .m = 0 ;
233
272
tbx = (tbx_t * )calloc (1 , sizeof (tbx_t ));
@@ -237,11 +276,23 @@ tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf)
237
276
else min_shift = 14 , n_lvls = 5 , fmt = HTS_FMT_TBI ;
238
277
while ((ret = bgzf_getline (fp , '\n' , & str )) >= 0 ) {
239
278
++ lineno ;
279
+ if (str .s [0 ] == tbx -> conf .meta_char && fmt == HTS_FMT_CSI ) {
280
+ switch (tbx -> conf .preset ) {
281
+ case TBX_SAM :
282
+ adjust_max_ref_len_sam (str .s , & max_ref_len ); break ;
283
+ case TBX_VCF :
284
+ adjust_max_ref_len_vcf (str .s , & max_ref_len ); break ;
285
+ default :
286
+ break ;
287
+ }
288
+ }
240
289
if (lineno <= tbx -> conf .line_skip || str .s [0 ] == tbx -> conf .meta_char ) {
241
290
last_off = bgzf_tell (fp );
242
291
continue ;
243
292
}
244
293
if (first == 0 ) {
294
+ if (fmt == HTS_FMT_CSI )
295
+ n_lvls = adjust_n_lvls (min_shift , n_lvls , max_ref_len );
245
296
tbx -> idx = hts_idx_init (0 , fmt , last_off , min_shift , n_lvls );
246
297
if (!tbx -> idx ) goto fail ;
247
298
first = 1 ;
0 commit comments