@@ -39,11 +39,15 @@ DEALINGS IN THE SOFTWARE. */
39
39
#include "htslib/thread_pool.h"
40
40
#include "bcf_sr_sort.h"
41
41
42
- #define MAX_CSI_COOR 0x7fffffff // maximum indexable coordinate of .csi
42
+ // Maximum indexable coordinate of .csi, for default min_shift of 14.
43
+ // This comes out to about 17 Tbp. Limiting factor is the bin number,
44
+ // which is a uint32_t in CSI. The highest number of levels compatible
45
+ // with this is 10 (needs 31 bits).
46
+ #define MAX_CSI_COOR ((1LL << (14 + 30)) - 1)
43
47
44
48
typedef struct
45
49
{
46
- uint32_t start , end ;
50
+ hts_pos_t start , end ;
47
51
}
48
52
region1_t ;
49
53
@@ -61,7 +65,7 @@ typedef struct
61
65
}
62
66
aux_t ;
63
67
64
- static int _regions_add (bcf_sr_regions_t * reg , const char * chr , int start , int end );
68
+ static int _regions_add (bcf_sr_regions_t * reg , const char * chr , hts_pos_t start , hts_pos_t end );
65
69
static bcf_sr_regions_t * _regions_init_string (const char * str );
66
70
static int _regions_match_alleles (bcf_sr_regions_t * reg , int als_idx , bcf1_t * rec );
67
71
@@ -420,11 +424,11 @@ static inline int has_filter(bcf_sr_t *reader, bcf1_t *line)
420
424
return 0 ;
421
425
}
422
426
423
- static int _reader_seek (bcf_sr_t * reader , const char * seq , int start , int end )
427
+ static int _reader_seek (bcf_sr_t * reader , const char * seq , hts_pos_t start , hts_pos_t end )
424
428
{
425
429
if ( end >=MAX_CSI_COOR )
426
430
{
427
- hts_log_error ("The coordinate is out of csi index limit: %d" , end + 1 );
431
+ hts_log_error ("The coordinate is out of csi index limit: %" PRIhts_pos , end + 1 );
428
432
exit (1 );
429
433
}
430
434
if ( reader -> itr )
@@ -446,7 +450,7 @@ static int _reader_seek(bcf_sr_t *reader, const char *seq, int start, int end)
446
450
reader -> itr = bcf_itr_queryi (reader -> bcf_idx ,tid ,start ,end + 1 );
447
451
}
448
452
if (!reader -> itr ) {
449
- hts_log_error ("Could not seek: %s:%d-%d" , seq , start + 1 , end + 1 );
453
+ hts_log_error ("Could not seek: %s:%" PRIhts_pos "-%" PRIhts_pos , seq , start + 1 , end + 1 );
450
454
assert (0 );
451
455
}
452
456
return 0 ;
@@ -581,7 +585,8 @@ static void _reader_shift_buffer(bcf_sr_t *reader)
581
585
582
586
static int next_line (bcf_srs_t * files )
583
587
{
584
- int i , min_pos = INT_MAX ;
588
+ int i ;
589
+ hts_pos_t min_pos = HTS_POS_MAX ;
585
590
const char * chr = NULL ;
586
591
587
592
// Loop until next suitable line is found or all readers have finished
@@ -606,7 +611,7 @@ static int next_line(bcf_srs_t *files)
606
611
else if ( min_pos == files -> readers [i ].buffer [1 ]-> pos )
607
612
bcf_sr_sort_add_active (& BCF_SR_AUX (files )-> sort , i );
608
613
}
609
- if ( min_pos == INT_MAX )
614
+ if ( min_pos == HTS_POS_MAX )
610
615
{
611
616
if ( !files -> regions ) break ;
612
617
continue ;
@@ -622,7 +627,7 @@ static int next_line(bcf_srs_t *files)
622
627
for (i = 0 ; i < files -> nreaders ; i ++ )
623
628
if ( files -> readers [i ].nbuffer && files -> readers [i ].buffer [1 ]-> pos == min_pos )
624
629
_reader_shift_buffer (& files -> readers [i ]);
625
- min_pos = INT_MAX ;
630
+ min_pos = HTS_POS_MAX ;
626
631
chr = NULL ;
627
632
continue ;
628
633
}
@@ -672,7 +677,7 @@ static void bcf_sr_seek_start(bcf_srs_t *readers)
672
677
}
673
678
674
679
675
- int bcf_sr_seek (bcf_srs_t * readers , const char * seq , int pos )
680
+ int bcf_sr_seek (bcf_srs_t * readers , const char * seq , hts_pos_t pos )
676
681
{
677
682
if ( !readers -> regions ) return 0 ;
678
683
bcf_sr_sort_reset (& BCF_SR_AUX (readers )-> sort );
@@ -767,7 +772,7 @@ int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file)
767
772
768
773
// Add a new region into a list sorted by start,end. On input the coordinates
769
774
// are 1-based, stored 0-based, inclusive.
770
- static int _regions_add (bcf_sr_regions_t * reg , const char * chr , int start , int end )
775
+ static int _regions_add (bcf_sr_regions_t * reg , const char * chr , hts_pos_t start , hts_pos_t end )
771
776
{
772
777
if ( start == -1 && end == -1 )
773
778
{
@@ -828,7 +833,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
828
833
829
834
kstring_t tmp = {0 ,0 ,0 };
830
835
const char * sp = str , * ep = str ;
831
- int from , to ;
836
+ hts_pos_t from , to ;
832
837
while ( 1 )
833
838
{
834
839
while ( * ep && * ep != ',' && * ep != ':' ) ep ++ ;
@@ -880,7 +885,7 @@ static bcf_sr_regions_t *_regions_init_string(const char *str)
880
885
881
886
// ichr,ifrom,ito are 0-based;
882
887
// returns -1 on error, 0 if the line is a comment line, 1 on success
883
- static int _regions_parse_line (char * line , int ichr ,int ifrom ,int ito , char * * chr ,char * * chr_end ,int * from ,int * to )
888
+ static int _regions_parse_line (char * line , int ichr , int ifrom , int ito , char * * chr , char * * chr_end , hts_pos_t * from , hts_pos_t * to )
884
889
{
885
890
if (ifrom < 0 || ito < 0 ) return -1 ;
886
891
* chr_end = NULL ;
@@ -970,7 +975,8 @@ bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int ichr
970
975
while ( hts_getline (reg -> file , KS_SEP_LINE , & reg -> line ) > 0 )
971
976
{
972
977
char * chr , * chr_end ;
973
- int from , to , ret ;
978
+ hts_pos_t from , to ;
979
+ int ret ;
974
980
ret = _regions_parse_line (reg -> line .s , ichr ,ifrom ,abs (ito ), & chr ,& chr_end ,& from ,& to );
975
981
if ( ret < 0 )
976
982
{
@@ -1077,7 +1083,8 @@ int bcf_sr_regions_next(bcf_sr_regions_t *reg)
1077
1083
1078
1084
// reading from tabix
1079
1085
char * chr , * chr_end ;
1080
- int ichr = 0 , ifrom = 1 , ito = 2 , is_bed = 0 , from , to ;
1086
+ int ichr = 0 , ifrom = 1 , ito = 2 , is_bed = 0 ;
1087
+ hts_pos_t from , to ;
1081
1088
if ( reg -> tbx )
1082
1089
{
1083
1090
ichr = reg -> tbx -> conf .sc - 1 ;
@@ -1196,7 +1203,7 @@ static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *re
1196
1203
return !(type & VCF_INDEL ) ? 1 : 0 ;
1197
1204
}
1198
1205
1199
- int bcf_sr_regions_overlap (bcf_sr_regions_t * reg , const char * seq , int start , int end )
1206
+ int bcf_sr_regions_overlap (bcf_sr_regions_t * reg , const char * seq , hts_pos_t start , hts_pos_t end )
1200
1207
{
1201
1208
int iseq ;
1202
1209
if ( khash_str2int_get (reg -> seq_hash , seq , & iseq )< 0 ) return -1 ; // no such sequence
0 commit comments