1
1
/*
2
- Copyright (C) 2017-2021 Genome Research Ltd.
2
+ Copyright (C) 2017-2021,2024 Genome Research Ltd.
3
3
4
4
Author: Petr Danecek <[email protected] >
5
5
32
32
#include "htslib/khash_str2int.h"
33
33
#include "htslib/kbitset.h"
34
34
35
+ // Variant types and pair-wise compatibility of their combinations, see bcf_sr_init_scores()
35
36
#define SR_REF 1
36
37
#define SR_SNP 2
37
38
#define SR_INDEL 4
@@ -366,7 +367,7 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
366
367
// group VCFs into groups, each with a unique combination of variants in the duplicate lines
367
368
int ireader ,ivar ,irec ,igrp ,ivset ,iact ;
368
369
for (ireader = 0 ; ireader < readers -> nreaders ; ireader ++ ) srt -> vcf_buf [ireader ].nrec = 0 ;
369
- for (iact = 0 ; iact < srt -> nactive ; iact ++ )
370
+ for (iact = 0 ; iact < srt -> nactive ; iact ++ ) // process each of the active readers, ie which still have a record to process
370
371
{
371
372
ireader = srt -> active [iact ];
372
373
bcf_sr_t * reader = & readers -> readers [ireader ];
@@ -384,6 +385,11 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
384
385
srt -> off [srt -> noff ++ ] = srt -> str .l ;
385
386
size_t beg = srt -> str .l ;
386
387
int end_pos = -1 ;
388
+ if ( srt -> pair & BCF_SR_PAIR_ID )
389
+ {
390
+ kputs (line -> d .id ,& srt -> str );
391
+ kputc (':' ,& srt -> str );
392
+ }
387
393
for (ivar = 1 ; ivar < line -> n_allele ; ivar ++ )
388
394
{
389
395
if ( ivar > 1 ) kputc (',' ,& srt -> str );
@@ -417,7 +423,10 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
417
423
}
418
424
419
425
// Create new variant or attach to existing one. But careful, there can be duplicate
420
- // records with the same POS,REF,ALT (e.g. in dbSNP-b142)
426
+ // records with the same POS,REF,ALT (e.g. in dbSNP-b142). In such case, use a
427
+ // hash table (srt->var_str2int) and a counter (var_idx) to ensure they are
428
+ // treated as separate variants, while still allowing them to be matched
429
+ // between readers.
421
430
char * var_str = beg + srt -> str .s ;
422
431
int ret , var_idx = 0 , var_end = srt -> str .l ;
423
432
while ( 1 )
@@ -435,6 +444,7 @@ static int bcf_sr_sort_set(bcf_srs_t *readers, sr_sort_t *srt, const char *chr,
435
444
}
436
445
if ( ret == -1 )
437
446
{
447
+ // the variant is not present, insert
438
448
ivar = srt -> nvar ++ ;
439
449
hts_expand0 (var_t ,srt -> nvar ,srt -> mvar ,srt -> var );
440
450
srt -> var [ivar ].nvcf = 0 ;
0 commit comments