diff --git a/INSTALL b/INSTALL
index e0fddd9d7..eb5048a24 100644
--- a/INSTALL
+++ b/INSTALL
@@ -38,6 +38,7 @@ a development ('-dev' or '-devel') package separate from the main library.
     liblzma    (required, unless configured with --disable-lzma)
     libcurl    (optional, but strongly recommended)
     libcrypto  (optional for Amazon S3 support; not needed on MacOS)
+    libdeflate (optional, but strongly recommended for faster gzip)
 
 Disabling libbzip2 and liblzma will make some CRAM files unreadable, so
 is not recommended.
@@ -252,14 +253,14 @@ Debian / Ubuntu
 ---------------
 
 sudo apt-get update  # Ensure the package list is up to date
-sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev
+sudo apt-get install autoconf automake make gcc perl zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev libssl-dev libdeflate-dev
 
 Note: libcurl4-openssl-dev can be used as an alternative to libcurl4-gnutls-dev.
 
 RedHat / CentOS
 ---------------
 
-sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel
+sudo yum install autoconf automake make gcc perl-Data-Dumper zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel libdeflate-devel
 
 Note: On some versions perl FindBin will need to be installed to make the tests work.
 
@@ -271,6 +272,9 @@ Alpine Linux
 doas apk update  # Ensure the package list is up to date
 doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev
 
+Ideally also install a copy of libdeflate-dev for faster (de)compression.
+This can be found in the Alpine community repository.
+
 Note: some older Alpine versions use libressl-dev rather than openssl-dev.
 
 OpenSUSE
@@ -278,6 +282,9 @@ OpenSUSE
 
 sudo zypper install autoconf automake make gcc perl zlib-devel libbz2-devel xz-devel libcurl-devel libopenssl-devel
 
+Also install libdeflate-devel, available on OpenSUSE Leap 15.4 onwards
+or directly via git releases above.
+
 Windows MSYS2/MINGW64
 ---------------------
 
diff --git a/Makefile b/Makefile
index c5aa217c2..99142c865 100644
--- a/Makefile
+++ b/Makefile
@@ -150,8 +150,8 @@ LIBHTS_SOVERSION = 3
 # is not strictly necessary and should be removed the next time
 # LIBHTS_SOVERSION is bumped (see #1144 and
 # https://developer.apple.com/library/archive/documentation/DeveloperTools/Conceptual/DynamicLibraries/100-Articles/DynamicLibraryDesignGuidelines.html#//apple_ref/doc/uid/TP40002013-SW23)
-MACH_O_COMPATIBILITY_VERSION = 3.1.19
-MACH_O_CURRENT_VERSION = 3.1.19
+MACH_O_COMPATIBILITY_VERSION = 3.1.20
+MACH_O_CURRENT_VERSION = 3.1.20
 
 # Force version.h to be remade if $(PACKAGE_VERSION) has changed.
 version.h: $(if $(wildcard version.h),$(if $(findstring "$(PACKAGE_VERSION)",$(shell cat version.h)),,force))
diff --git a/NEWS b/NEWS
index e9de8b916..83dcaa5b9 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,91 @@
+Noteworthy changes in release 1.20 (15th April 2024)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Updates
+-------
+
+* When working on named files, bgzip now sets the modified and access times
+  of the output files it makes to match those of the corresponding input.
+  (PR #1727, feature request #1718.  Requested by Gert Hulselmans)
+
+* It's now possible to use a -o option to specify the output file name in
+  bgzip.
+  (PR #1747, feature request #1726.  Requested by Gert Hulselmans)
+
+* Improved error faidx error messages.
+  (PR #1743, thanks to Nick Moore)
+
+* Faster reading of SAM array (type "B") tags.  These often turn up
+  in ONT and PacBio data.
+  (PR #1741)
+
+* Improved validity checking of base modification tags.
+  (PR #1749)
+
+* mpileup overlap removal now works where one read has a deletion.
+  (PR #1751, fixes samtools/samtools#1992.  Reported by Long Tian)
+
+* The S3 plugin can now find buckets via S3 access point aliases.
+  (PR #1756, thanks to Matt Pawelczyk;
+   fixes samtools/samtools#1984.  Reported by Albert Li)
+
+* Added a --threads option (and -@ short option) to tabix.
+  (PR #1755, feature request #1735.  Requested by Dan Bolser)
+
+* tabix can now index Graph Alignment Format (GAF) files.
+  (See https://github.com/lh3/gfatools/blob/master/doc/rGFA.md)
+  (PR #1763, thanks to Adam Novak)
+
+Bug fixes
+---------
+
+* Security fix: Prevent possible heap overflow in cram_encode_aux() on
+  bad RG:Z tags.
+  (PR #1737)
+
+* Security fix: Prevent attempts to call a NULL pointer if certain URL
+  schemes are used in CRAM @SQ UR: tags.
+  (PR #1757)
+
+* Security fix: Fixed a bug where following certain AWS S3 redirects could
+  downgrade the connection from TLS (i.e. https://) to unencrypted http://.
+  This could happen when using path-based URLs and AWS_DEFAULT_REGION
+  was set to a region other that the one where the data was stored.
+  (PR #1762, fixes #1760. Reported by andaca)
+
+* Fixed arithmetic overflow when loading very long references for CRAM.
+  (PR #1738, fixes #1738.  Reported by Shane McCarthy)
+
+* Fixed faidx and CRAM reference look-ups on compressed	fasta where the .fai
+  index file was present, but the .gzi index of compressed offsets was not.
+  (PR #1745, fixes #1744.  Reported by Theodore Li)
+
+* Fixed BCF indexing on-the-fly bug which produced invalid indexes when
+  using multiple compression threads.
+  (PR #1742, fixes #1740.  Reported by graphenn)
+
+* Ensure that pileup destructors are called by bam_plp_destroy(), to
+  prevent memory leaks.
+  (PR #1749, PR #1754)
+
+* Ensure on-the-fly index timestamps are always older than the data file.
+  Previously the files could be closed out of order, leading to warnings
+  being printed when using the index.
+  (PR #1753, fixes #1732.  Reported by Gert Hulselmans)
+
+* To prevent data corruption when reading (strictly invalid) VCF files
+  with duplicated FORMAT tags, all but the first copy of the data
+  associated with the tag are now dropped with a warning.
+  (PR #1752, PR #1761, fixes #1733.  Reported by anthakki)
+
+* Fixed a bug introduced in release 1.19 (PR #1689) which broke variant
+  record data if it tried to remove an over-long tag.
+  (PR #1752, PR #1761)
+
+* Changed error to warning when complaining about use of the CG tag
+  in SAM or CRAM files.
+  (PR #1758, fixes samtools/samtools#2002)
+
 Noteworthy changes in release 1.19.1 (22nd January 2024)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -33,6 +121,7 @@ Updates
 
   In future work, the library will instead convert such tags into their
   local alternatives (see https://github.com/samtools/hts-specs/pull/434).
+  (PR #1689)
 
 * New program. Adds annot-tsv which annotates regions in a destination file with
   texts from overlapping regions in a source file.
diff --git a/annot-tsv.1 b/annot-tsv.1
index 22191da9f..df3b06e91 100644
--- a/annot-tsv.1
+++ b/annot-tsv.1
@@ -1,5 +1,5 @@
 '\" t
-.TH annot-tsv 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools"
+.TH annot-tsv 1 "15 April 2024" "htslib-1.20" "Bioinformatics tools"
 .\"
 .\" Copyright (C) 2015, 2017-2018, 2023 Genome Research Ltd.
 .\"
diff --git a/bgzf.c b/bgzf.c
index 45fbd3db2..8092c7b9a 100644
--- a/bgzf.c
+++ b/bgzf.c
@@ -2,7 +2,7 @@
 
    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
                  2011, 2012 Attractive Chaos <attractor@live.co.uk>
-   Copyright (C) 2009, 2013-2022 Genome Research Ltd
+   Copyright (C) 2009, 2013-2023 Genome Research Ltd
 
    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
@@ -1959,6 +1959,11 @@ int bgzf_flush(BGZF *fp)
         return ret;
     }
 #endif
+
+    if (!fp->is_compressed) {
+        return hflush(fp->fp);
+    }
+
     while (fp->block_offset > 0) {
         int block_length;
         if ( fp->idx_build_otf )
diff --git a/bgzip.1 b/bgzip.1
index 3be9a5bfb..fe4225b43 100644
--- a/bgzip.1
+++ b/bgzip.1
@@ -1,10 +1,10 @@
-.TH bgzip 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools"
+.TH bgzip 1 "15 April 2024" "htslib-1.20" "Bioinformatics tools"
 .SH NAME
 .PP
 bgzip \- Block compression/decompression utility
 .\"
 .\" Copyright (C) 2009-2011 Broad Institute.
-.\" Copyright (C) 2018, 2021-2023 Genome Research Limited.
+.\" Copyright (C) 2018, 2021-2024 Genome Research Limited.
 .\"
 .\" Author: Heng Li <lh3@sanger.ac.uk>
 .\"
@@ -50,6 +50,8 @@ bgzip \- Block compression/decompression utility
 .IR index_name ]
 .RB [ -l
 .IR compression_level ]
+.RB [ -o
+.IR outfile ]
 .RB [ -s
 .IR size ]
 .RB [ -@
@@ -71,7 +73,9 @@ otherwise when compressing bgzip will write to a new file with a .gz
 suffix and remove the original.  When decompressing the input file must
 have a .gz suffix, which will be removed to make the output name.  Again
 after decompression completes the input file will be removed. When multiple
-files are given as input, the operation is performed on all of them.
+files are given as input, the operation is performed on all of them. Access
+and modification time of input file from filesystem is set to output file. 
+Note, access time may get updated by system when it deems appropriate.
 
 .SH OPTIONS
 .TP 10
@@ -128,6 +132,10 @@ Do not delete input file during operation.
 Compression level to use when compressing.
 From 0 to 9, or -1 for the default level set by the compression library. [-1]
 .TP
+.BI "-o, --output " FILE
+Write to a file, keep original files unchanged, will overwrite an existing
+file.
+.TP
 .B "-r, --reindex"
 Rebuild the index on an existing compressed file.
 .TP
@@ -136,7 +144,7 @@ Decompress INT bytes (uncompressed size) to standard output.
 Implies -c.
 .TP
 .B "-t, --test"
-Test the intregrity of the compressed file.
+Test the integrity of the compressed file.
 .TP
 .BI "-@, --threads " INT
 Number of threads to use [1].
diff --git a/bgzip.c b/bgzip.c
index e7caa7833..129343fb5 100644
--- a/bgzip.c
+++ b/bgzip.c
@@ -1,7 +1,7 @@
 /* bgzip.c -- Block compression/decompression utility.
 
    Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology
-   Copyright (C) 2010, 2013-2019, 2021-2023 Genome Research Ltd.
+   Copyright (C) 2010, 2013-2019, 2021-2024 Genome Research Ltd.
 
    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
@@ -34,6 +34,8 @@
 #include <stdarg.h>
 #include <getopt.h>
 #include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/time.h>
 #include "htslib/bgzf.h"
 #include "htslib/hts.h"
 #include "htslib/hfile.h"
@@ -41,6 +43,7 @@
 #ifdef _WIN32
 #  define WIN32_LEAN_AND_MEAN
 #  include <windows.h>
+#  include <sys/utime.h>
 #endif
 
 static const int WINDOW_SIZE = BGZF_BLOCK_SIZE;
@@ -103,6 +106,89 @@ static int confirm_filename(int *is_forced, const char *name, const char *ext)
     return ask_yn();
 }
 
+/* getfilespec - get file status data
+   path        - file path for which status to be retrieved
+   status      - pointer to status structure in which the data to be stored
+   returns 0 on success and -1 on failure
+*/
+static int getfilespec(const char *path, struct stat *status)
+{
+    if (!path || !status) {     //invalid
+        return -1;
+    }
+    if (!strcmp(path, "-")) {   //cant get / set for stdin/out, return success
+        return 0;
+    }
+    if (stat(path, status) < 0) {
+        return -1;
+    }
+    return 0;
+}
+
+/* setfilespec - set file status data
+   path        - file path for which status to be set
+   status      - pointer to status structure in which the data is present
+   returns 0 on success and -1 on failure
+   sets only the time as of now.
+*/
+static int setfilespec(const char *path, const struct stat *status)
+{
+    if (!path || !status) {     //invalid
+        return -1;
+    }
+    if (!strcmp(path, "-")) {   //cant get / set for stdin/out, return success
+        return 0;
+    }
+
+#ifdef _WIN32
+    struct _utimbuf tval;
+    //time upto sec - access & modification time
+    tval.actime = status->st_atime;
+    tval.modtime = status->st_mtime;
+    if (_utime(path, &tval) < 0) {
+        fprintf(stderr, "[bgzip] Failed to set file specifications.\n");
+        return -1;
+    }
+#else
+    struct timeval tval[2];
+    memset(&tval[0], 0, sizeof(tval));
+    //time upto sec - access time
+    tval[0].tv_sec = status->st_atime;
+    //time upto sec - modification time
+    tval[1].tv_sec = status->st_mtime;
+    if (utimes(path, &tval[0]) < 0) {
+        fprintf(stderr, "[bgzip] Failed to set file specifications.\n");
+        return -1;
+    }
+#endif //_WIN32
+    return 0;
+}
+
+
+static int check_name_and_extension(char *name, int *forced) {
+    size_t pos;
+    char *ext;
+
+    for (pos = strlen(name); pos > 0; --pos)
+        if (name[pos] == '.' || name[pos] == '/') break;
+
+    if (pos == 0 || name[pos] != '.') {
+        fprintf(stderr, "[bgzip] can't find an extension in %s -- please rename\n", name);
+        return 1;
+    }
+
+    name[pos] = '\0';
+    ext = &name[pos+1];
+
+    if (!(known_extension(ext) || confirm_filename(forced, name, ext))) {
+        fprintf(stderr, "[bgzip] unknown extension .%s -- declining to decompress to %s\n", ext, name);
+        return 2;                            //explicit N, continue and return 2
+    }
+
+    return 0;
+}
+
+
 static int bgzip_main_usage(FILE *fp, int status)
 {
     fprintf(fp, "\n");
@@ -119,6 +205,7 @@ static int bgzip_main_usage(FILE *fp, int status)
     fprintf(fp, "   -I, --index-name FILE      name of BGZF index file [file.gz.gzi]\n");
     fprintf(fp, "   -k, --keep                 don't delete input files during operation\n");
     fprintf(fp, "   -l, --compress-level INT   Compression level to use when compressing; 0 to 9, or -1 for default [-1]\n");
+    fprintf(fp, "   -o, --output FILE          write to file, keep original files unchanged\n");
     fprintf(fp, "   -r, --reindex              (re)index compressed file\n");
     fprintf(fp, "   -s, --size INT             decompress INT bytes (uncompressed size)\n");
     fprintf(fp, "   -t, --test                 test integrity of compressed file\n");
@@ -133,8 +220,10 @@ int main(int argc, char **argv)
     BGZF *fp;
     char *buffer;
     long start, end, size;
-    char *index_fname = NULL;
-    int threads = 1, isstdin = 0, usedstdout = 0, ret = 0;
+    struct stat filestat;
+    char *statfilename = NULL;
+    char *index_fname = NULL, *write_fname = NULL;
+    int threads = 1, isstdin = 0, usedstdout = 0, ret = 0, exp_out_open = 0, f_dst = -1;
 
     static const struct option loptions[] =
     {
@@ -154,11 +243,12 @@ int main(int argc, char **argv)
         {"version", no_argument, NULL, 1},
         {"keep", no_argument, NULL, 'k'},
         {"binary", no_argument, NULL, 2},
+        {"output", required_argument, NULL, 'o'},
         {NULL, 0, NULL, 0}
     };
 
     compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; test = 0; keep = 0; binary = 0;
-    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grtk",loptions,NULL)) >= 0){
+    while((c  = getopt_long(argc, argv, "cdh?fb:@:s:iI:l:grtko:",loptions,NULL)) >= 0){
         switch(c){
         case 'd': compress = 0; break;
         case 'c': pstdout = 1; break;
@@ -173,6 +263,7 @@ int main(int argc, char **argv)
         case '@': threads = atoi(optarg); break;
         case 't': test = 1; compress = 0; reindex = 0; break;
         case 'k': keep = 1; break;
+        case 'o': write_fname = optarg; break;
         case 1:
             printf(
 "bgzip (htslib) %s\n"
@@ -201,16 +292,32 @@ int main(int argc, char **argv)
     }
     /* avoid -I / indexfile with multiple inputs while index/reindex. these wont be set during
     read/decompress and are not considered even if set */
-    if ( (index || reindex) && index_fname && argc - optind > 1) {
+    if ( (index || reindex) && !write_fname && index_fname && argc - optind > 1) {
         fprintf(stderr, "[bgzip] Cannot specify index filename with multiple data file on index, reindex.\n");
         return 1;
     }
 
+    if (write_fname) {
+        if (pstdout) {
+            fprintf(stderr, "[bgzip] Cannot write to %s and stdout at the same time.\n", write_fname);
+            return 1;
+        } else if (strncmp(write_fname, "-", strlen(write_fname)) == 0) {
+            // stdout has special handling so treat as -c
+            pstdout = 1;
+            write_fname = NULL;
+        }
+    }
+
     do {
         isstdin = optind >= argc ? 1 : !strcmp("-", argv[optind]);          //using stdin or not?
-        /*stdout is in use when explicitly selected or when stdin in is in use, it need to be closed
+        /* when a named output file is not used, stdout is in use when explicitly
+        selected or when stdin in is in use, it needs to be closed
         explicitly to get all io errors*/
-        usedstdout |= isstdin || pstdout || test;
+
+        if (!write_fname)
+            usedstdout |= isstdin || pstdout || test;
+
+        statfilename = NULL;
 
         if (compress == 1) {
             hFILE* f_src = NULL;
@@ -230,7 +337,16 @@ int main(int argc, char **argv)
                 return 1;
             }
 
-            if ( argc>optind && !isstdin )                  //named input file that isn't an explicit "-"
+            if (write_fname) {
+                if (!exp_out_open) {  // only open this file once for writing, close at the end
+                    if ((fp = bgzf_open(write_fname, out_mode)) == NULL) {
+                        fprintf(stderr, "[bgzip] can't create %s: %s\n", write_fname, strerror(errno));
+                        return 1;
+                    } else {
+                        exp_out_open = 1;
+                    }
+                }
+            } else if ( argc>optind && !isstdin )            //named input file that isn't an explicit "-"
             {
                 if (pstdout)
                     fp = bgzf_open("-", out_mode);
@@ -257,7 +373,7 @@ int main(int argc, char **argv)
                         free(name);
                         return 1;
                     }
-                    free(name);
+                    statfilename = name;
                 }
             }
             else if (!pstdout && isatty(fileno((FILE *)stdout)) )
@@ -275,8 +391,12 @@ int main(int argc, char **argv)
                 bgzf_mt(fp, threads, 256);
 
             buffer = malloc(WINDOW_SIZE);
-            if (!buffer)
+            if (!buffer) {
+                if (statfilename) {
+                    free(statfilename);
+                }
                 return 1;
+            }
             if (rebgzip){
                 if ( bgzf_index_load(fp, index_fname, NULL) < 0 ) error("Could not load index: %s.%s\n", !isstdin ? argv[optind] : index_fname, !isstdin ? "gzi" : "");
 
@@ -360,8 +480,12 @@ int main(int argc, char **argv)
                             error("Could not write %d bytes: Error %d\n",
                                 n, fp->errcode);
                         if (flush)
-                            if (bgzf_flush_try(fp, 65536) < 0) // force
+                            if (bgzf_flush_try(fp, 65536) < 0) {// force
+                                if (statfilename) {
+                                    free(statfilename);
+                                }
                                 return -1;
+                            }
 
                         memmove(buffer, buffer+n, c2-n);
                         n = c2-n;
@@ -373,7 +497,7 @@ int main(int argc, char **argv)
                             n, fp->errcode);
                 }
             }
-            if ( index )
+            if ( index && !write_fname )
             {
                 if (index_fname) {
                     if (bgzf_index_dump(fp, index_fname, NULL) < 0)
@@ -387,11 +511,31 @@ int main(int argc, char **argv)
                     error("Can not write index for stdin data without index filename, use -I option to set index file.\n");
                 }
             }
-            if (bgzf_close(fp) < 0)
-                error("Output close failed: Error %d\n", fp->errcode);
+
+            if (!write_fname) {
+                if (bgzf_close(fp) < 0)
+                    error("Output close failed: Error %d\n", fp->errcode);
+            }
+
             if (hclose(f_src) < 0)
                 error("Input close failed\n");
-            if (argc > optind && !pstdout && !keep && !isstdin) unlink(argv[optind]);
+
+            if (statfilename) {
+                //get input file timestamp
+                if (!getfilespec(argv[optind], &filestat)) {
+                    //set output file timestamp
+                    if (setfilespec(statfilename, &filestat) < 0) {
+                        fprintf(stderr, "[bgzip] Failed to set file specification.\n");
+                    }
+                }
+                else {
+                    fprintf(stderr, "[bgzip] Failed to get file specification.\n");
+                }
+                free(statfilename);
+            }
+
+            if (argc > optind && !pstdout && !keep && !isstdin && !write_fname) unlink(argv[optind]);
+
             free(buffer);
         }
         else if ( reindex )
@@ -431,7 +575,7 @@ int main(int argc, char **argv)
         }
         else
         {
-            int f_dst, is_forced_tmp = is_forced;
+            int is_forced_tmp = is_forced;
 
             if ( argc>optind && !isstdin )
             {
@@ -448,46 +592,55 @@ int main(int argc, char **argv)
 
                 if (pstdout || test) {
                     f_dst = fileno(stdout);
-                }
-                else {
+                } else {
                     const int wrflags = O_WRONLY | O_CREAT | O_TRUNC;
-                    char *name = argv[optind], *ext;
-                    size_t pos;
-                    for (pos = strlen(name); pos > 0; --pos)
-                        if (name[pos] == '.' || name[pos] == '/') break;
-                    if (pos == 0 || name[pos] != '.') {
-                        fprintf(stderr, "[bgzip] can't remove an extension from %s -- please rename\n", argv[optind]);
+                    char *name;
+                    int check;
+
+                    if (!(name = strdup(argv[optind]))) {
+                        fprintf(stderr, "[bgzip] unable to allocate memory for output file name.\n");
                         bgzf_close(fp);
                         return 1;
                     }
-                    name = strdup(argv[optind]);
-                    name[pos] = '\0';
-                    ext = &name[pos+1];
-                    if (! (known_extension(ext) || confirm_filename(&is_forced_tmp, name, ext))) {
-                        fprintf(stderr, "[bgzip] unknown extension .%s -- declining to decompress to %s\n", ext, name);
+
+                    if ((check = check_name_and_extension(name, &is_forced_tmp))) {
                         bgzf_close(fp);
-                        free(name);
-                        ret = 2;                            //explicit N, continue and return 2
-                        continue;
+
+                        if (check == 1) {
+                            return 1;
+                        } else {
+                            ret = 2;
+                            continue;
+                        }
                     }
-                    f_dst = open(name, is_forced_tmp? wrflags : wrflags|O_EXCL, 0666);
-                    if (f_dst < 0 && errno == EEXIST) {
-                        if (confirm_overwrite(name)) {
-                            f_dst = open(name, wrflags, 0666);
+
+                    if (!exp_out_open) {
+                        if (write_fname) { // only open file once and don't care about overwriting
+                            is_forced_tmp = 1;
+                            exp_out_open = 1;
                         }
-                        else {
-                            ret = 2;                        //explicit N - no overwrite, continue and return 2
+
+                        f_dst = open(write_fname ? write_fname : name, is_forced_tmp? wrflags : wrflags|O_EXCL, 0666);
+
+                        if (f_dst < 0 && errno == EEXIST) {
+                            if (confirm_overwrite(name)) {
+                                f_dst = open(name, wrflags, 0666);
+                            }
+                            else {
+                                ret = 2;                        //explicit N - no overwrite, continue and return 2
+                                bgzf_close(fp);
+                                free(name);
+                                continue;
+                            }
+                        }
+                        if (f_dst < 0) {
+                            fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
                             free(name);
-                            bgzf_close(fp);
-                            continue;
+                            return 1;
                         }
                     }
-                    if (f_dst < 0) {
-                        fprintf(stderr, "[bgzip] can't create %s: %s\n", name, strerror(errno));
-                        free(name);
-                        return 1;
-                    }
-                    free(name);
+
+                    statfilename = name;
                 }
             }
             else if (!pstdout && isatty(fileno((FILE *)stdin)) )
@@ -505,6 +658,21 @@ int main(int argc, char **argv)
                     bgzf_close(fp);
                     return 1;
                 }
+
+                if (!write_fname) {
+                    f_dst = fileno(stdout);
+                } else {
+                    if (!exp_out_open) {
+                        exp_out_open = 1;
+
+                        f_dst = open(write_fname, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+
+                        if (f_dst < 0) {
+                            fprintf(stderr, "[bgzip] can't create %s: %s\n", write_fname, strerror(errno));
+                            return 1;
+                        }
+                    }
+                }
             }
 
             buffer = malloc(WINDOW_SIZE);
@@ -549,8 +717,26 @@ int main(int argc, char **argv)
             end = end_reg;
             free(buffer);
             if (bgzf_close(fp) < 0) error("Close failed: Error %d\n",fp->errcode);
-            if (argc > optind && !pstdout && !test && !keep && !isstdin) unlink(argv[optind]);
-            if (!isstdin && !pstdout && !test) {
+
+            if (statfilename) {
+                if (!write_fname) {
+                    //get input file timestamp
+                    if (!getfilespec(argv[optind], &filestat)) {
+                        //set output file timestamp
+                        if (setfilespec(statfilename, &filestat) < 0) {
+                            fprintf(stderr, "[bgzip] Failed to set file specification.\n");
+                        }
+                    }
+                    else {
+                        fprintf(stderr, "[bgzip] Failed to get file specification.\n");
+                    }
+                }
+
+                free(statfilename);
+            }
+
+            if (argc > optind && !pstdout && !test && !keep && !isstdin && !write_fname) unlink(argv[optind]);
+            if (!isstdin && !pstdout && !test && !write_fname) {
                 close(f_dst);                               //close output file when it is not stdout
             }
         }
@@ -562,6 +748,25 @@ int main(int argc, char **argv)
             fprintf(stderr, "[bgzip] Failed to close stdout, errno %d", errno);
             ret = 1;
         }
+    } else if (write_fname) {
+        if (compress == 1) { // close explicit output file (this is for compression)
+            if (index) {
+                if (index_fname) {
+                    if (bgzf_index_dump(fp, index_fname, NULL) < 0)
+                        error("Could not write index to '%s'\n", index_fname);
+                } else {
+                    if (bgzf_index_dump(fp, write_fname, ".gzi") < 0)
+                        error("Could not write index to '%s.gzi'\n", write_fname);
+                }
+            }
+
+            if (bgzf_close(fp) < 0)
+                error("Output close failed: Error %d\n", fp->errcode);
+        } else {
+            close(f_dst);
+        }
     }
+
+
     return ret;
 }
diff --git a/cram/cram_encode.c b/cram/cram_encode.c
index 9651abdec..4a762f7b0 100644
--- a/cram/cram_encode.c
+++ b/cram/cram_encode.c
@@ -2799,15 +2799,22 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b,
         // RG:Z
         if (aux[0] == 'R' && aux[1] == 'G' && aux[2] == 'Z') {
             char *rg = &aux[3];
+            aux = rg;
+            while (aux < aux_end && *aux++);
+            if (aux == aux_end && aux[-1] != '\0') {
+                hts_log_error("Unterminated RG:Z tag for read \"%s\"",
+                              bam_get_qname(b));
+                goto err;
+            }
             brg = sam_hrecs_find_rg(fd->header->hrecs, rg);
             if (brg) {
-                while (aux < aux_end && *aux++);
                 if (CRAM_MAJOR_VERS(fd->version) >= 4)
                     BLOCK_APPEND(td_b, "RG*", 3);
                 continue;
             } else {
                 // RG:Z tag will be stored verbatim
                 hts_log_warning("Missing @RG header for RG \"%s\"", rg);
+                aux = rg - 3;
             }
         }
 
@@ -2816,6 +2823,11 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b,
             if (cr->len && !no_ref && !(cr->flags & BAM_FUNMAP) && !verbatim_MD) {
                 if (MD && MD->s && strncasecmp(MD->s, aux+3, orig + aux_size - (aux+3)) == 0) {
                     while (aux < aux_end && *aux++);
+                    if (aux == aux_end && aux[-1] != '\0') {
+                        hts_log_error("Unterminated MD:Z tag for read \"%s\"",
+                                      bam_get_qname(b));
+                        goto err;
+                    }
                     if (CRAM_MAJOR_VERS(fd->version) >= 4)
                         BLOCK_APPEND(td_b, "MD*", 3);
                     continue;
@@ -3093,6 +3105,12 @@ static sam_hrec_rg_t *cram_encode_aux(cram_fd *fd, bam_seq_t *b,
             aux += 3;
             aux_s = aux;
             while (aux < aux_end && *aux++);
+            if (aux == aux_end && aux[-1] != '\0') {
+                hts_log_error("Unterminated %c%c:%c tag for read \"%s\"",
+                              aux_s[-3], aux_s[-2], aux_s[-1],
+                              bam_get_qname(b));
+                goto err;
+            }
             if (codec->encode(s, codec, aux_s, aux - aux_s) < 0)
                 goto err;
             break;
diff --git a/cram/cram_io.c b/cram/cram_io.c
index c3efb735f..247423354 100644
--- a/cram/cram_io.c
+++ b/cram/cram_io.c
@@ -1,5 +1,5 @@
 /*
-Copyright (c) 2012-2023 Genome Research Ltd.
+Copyright (c) 2012-2024 Genome Research Ltd.
 Author: James Bonfield <jkb@sanger.ac.uk>
 
 Redistribution and use in source and binary forms, with or without
@@ -3221,7 +3221,8 @@ void cram_ref_decr(refs_t *r, int id) {
  * Returns all or part of a reference sequence on success (malloced);
  *         NULL on failure.
  */
-static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) {
+static char *load_ref_portion(BGZF *fp, ref_entry *e,
+                              hts_pos_t start, hts_pos_t end) {
     off_t offset, len;
     char *seq;
 
@@ -3317,7 +3318,7 @@ static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) {
  */
 ref_entry *cram_ref_load(refs_t *r, int id, int is_md5) {
     ref_entry *e = r->ref_id[id];
-    int start = 1, end = e->length;
+    hts_pos_t start = 1, end = e->length;
     char *seq;
 
     if (e->seq) {
@@ -3401,7 +3402,7 @@ ref_entry *cram_ref_load(refs_t *r, int id, int is_md5) {
  * Returns reference on success,
  *         NULL on failure
  */
-char *cram_get_ref(cram_fd *fd, int id, int start, int end) {
+char *cram_get_ref(cram_fd *fd, int id, hts_pos_t start, hts_pos_t end) {
     ref_entry *r;
     char *seq;
     int ostart = start;
@@ -4928,7 +4929,7 @@ int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr) {
             if (!sam_hrecs_find_key(ty, "M5", NULL)) {
                 char unsigned buf[16];
                 char buf2[33];
-                int rlen;
+                hts_pos_t rlen;
                 hts_md5_context *md5;
 
                 if (!fd->refs ||
@@ -4956,7 +4957,19 @@ int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr) {
                 rlen = fd->refs->ref_id[i]->length; /* In case it just loaded */
                 if (!(md5 = hts_md5_init()))
                     return -1;
-                hts_md5_update(md5, ref, rlen);
+                if (HTS_POS_MAX <= ULONG_MAX) {
+                    // Platforms with 64-bit unsigned long update in one go
+                    hts_md5_update(md5, ref, rlen);
+                } else {
+                    // Those with 32-bit ulong (Windows) may have to loop
+                    // over epic references
+                    hts_pos_t pos = 0;
+                    while (rlen - pos > ULONG_MAX) {
+                        hts_md5_update(md5, ref + pos, ULONG_MAX);
+                        pos += ULONG_MAX;
+                    }
+                    hts_md5_update(md5, ref + pos, (unsigned long)(rlen - pos));
+                }
                 hts_md5_final(buf, md5);
                 hts_md5_destroy(md5);
                 cram_ref_decr(fd->refs, i);
diff --git a/cram/cram_io.h b/cram/cram_io.h
index 11279d492..d2d583df7 100644
--- a/cram/cram_io.h
+++ b/cram/cram_io.h
@@ -391,7 +391,7 @@ void refs_free(refs_t *r);
  * Returns reference on success;
  *         NULL on failure
  */
-char *cram_get_ref(cram_fd *fd, int id, int start, int end);
+char *cram_get_ref(cram_fd *fd, int id, hts_pos_t start, hts_pos_t end);
 void cram_ref_incr(refs_t *r, int id);
 void cram_ref_decr(refs_t *r, int id);
 /**@}*/
diff --git a/faidx.c b/faidx.c
index de5851446..ce8fe5d9f 100644
--- a/faidx.c
+++ b/faidx.c
@@ -190,7 +190,7 @@ static faidx_t *fai_build_core(BGZF *bgzf) {
                 kputsn("", 0, &name);
 
                 if (c < 0) {
-                    hts_log_error("The last entry '%s' has no sequence", name.s);
+                    hts_log_error("The last entry '%s' has no sequence at line %d", name.s, line_num);
                     goto fail;
                 }
 
@@ -247,7 +247,7 @@ static faidx_t *fai_build_core(BGZF *bgzf) {
                         state = SEQ_END;
 
                 } else if (line_len < ll) {
-                    hts_log_error("Different line length in sequence '%s'", name.s);
+                    hts_log_error("Different line length in sequence '%s' at line %d", name.s, line_num);
                     goto fail;
                 }
 
@@ -269,7 +269,7 @@ static faidx_t *fai_build_core(BGZF *bgzf) {
             case IN_QUAL:
                 if (c == '\n') {
                     if (!read_done) {
-                        hts_log_error("Inlined empty line is not allowed in quality of sequence '%s'", name.s);
+                        hts_log_error("Inlined empty line is not allowed in quality of sequence '%s' at line %d", name.s, line_num);
                         goto fail;
                     }
 
@@ -312,6 +312,7 @@ static faidx_t *fai_build_core(BGZF *bgzf) {
         if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0)
             goto fail;
     } else {
+        hts_log_error("File truncated at line %d", line_num);
         goto fail;
     }
 
diff --git a/hfile.c b/hfile.c
index 8a878b536..fc87049ca 100644
--- a/hfile.c
+++ b/hfile.c
@@ -1,6 +1,6 @@
 /*  hfile.c -- buffered low-level input/output streams.
 
-    Copyright (C) 2013-2021 Genome Research Ltd.
+    Copyright (C) 2013-2021, 2023-2024 Genome Research Ltd.
 
     Author: John Marshall <jm18@sanger.ac.uk>
 
@@ -884,11 +884,18 @@ char *hfile_mem_steal_buffer(hFILE *file, size_t *length) {
     return buf;
 }
 
+// open() stub for mem: which only works with the vopen() interface
+// Use 'data:,' for data encoded in the URL
+static hFILE *hopen_not_supported(const char *fname, const char *mode) {
+    errno = EINVAL;
+    return NULL;
+}
+
 int hfile_plugin_init_mem(struct hFILE_plugin *self)
 {
     // mem files are declared remote so they work with a tabix index
     static const struct hFILE_scheme_handler handler =
-            {NULL, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
+            {hopen_not_supported, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
     self->name = "mem";
     hfile_add_scheme_handler("mem", &handler);
     return 0;
@@ -923,7 +930,7 @@ static hFILE *crypt4gh_needed(const char *url, const char *mode)
 int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self)
 {
     static const struct hFILE_scheme_handler handler =
-        { crypt4gh_needed, NULL, "crypt4gh-needed", 0, NULL };
+        { crypt4gh_needed, hfile_always_local, "crypt4gh-needed", 0, NULL };
     self->name = "crypt4gh-needed";
     hfile_add_scheme_handler("crypt4gh", &handler);
     return 0;
@@ -1022,6 +1029,10 @@ void hfile_add_scheme_handler(const char *scheme,
                               const struct hFILE_scheme_handler *handler)
 {
     int absent;
+    if (handler->open == NULL || handler->isremote == NULL) {
+        hts_log_warning("Couldn't register scheme handler for %s: missing method", scheme);
+        return;
+    }
     if (!schemes) {
         if (try_exe_add_scheme_handler(scheme, handler) != 0) {
             hts_log_warning("Couldn't register scheme handler for %s", scheme);
diff --git a/hfile_s3.c b/hfile_s3.c
index e2718f656..c7c52e617 100644
--- a/hfile_s3.c
+++ b/hfile_s3.c
@@ -1,6 +1,6 @@
 /*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
 
-    Copyright (C) 2015-2017, 2019-2023 Genome Research Ltd.
+    Copyright (C) 2015-2017, 2019-2024 Genome Research Ltd.
 
     Author: John Marshall <jm18@sanger.ac.uk>
 
@@ -51,6 +51,7 @@ typedef struct s3_auth_data {
     kstring_t user_query_string;
     kstring_t host;
     kstring_t profile;
+    enum {s3_auto, s3_virtual, s3_path} url_style;
     time_t creds_expiry_time;
     char *bucket;
     kstring_t auth_hdr;
@@ -563,17 +564,32 @@ static int redirect_endpoint_callback(void *auth, long response,
             kputs(new_region, &ad->region);
 
             ad->host.l = 0;
-            ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
 
+            if (ad->url_style == s3_path) {
+                // Path style https://s3.{region-code}.amazonaws.com/{bucket-name}/{key-name}
+                ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
+            } else {
+                // Virtual https://{bucket-name}.s3.{region-code}.amazonaws.com/{key-name}
+                // Extract the {bucket-name} from {ad->host} to include in subdomain
+                kstring_t url_prefix = KS_INITIALIZE;
+                kputsn(ad->host.s, strcspn(ad->host.s, "."), &url_prefix);
+
+                ksprintf(&ad->host, "%s.s3.%s.amazonaws.com", url_prefix.s, new_region);
+                free(url_prefix.s);
+            }
             if (ad->region.l && ad->host.l) {
+               int e = 0;
                url->l = 0;
-               kputs(ad->host.s, url);
-               kputsn(ad->bucket, strlen(ad->bucket), url);
-               if (ad->user_query_string.l) {
-                   kputc('?', url);
-                   kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
-               }
-               ret = 0;
+               e |= kputs("https://", url) < 0;
+               e |= kputs(ad->host.s, url) < 0;
+               e |= kputsn(ad->bucket, strlen(ad->bucket), url) < 0;
+
+               if (!e)
+                   ret = 0;
+            }
+            if (ad->user_query_string.l) {
+                kputc('?', url);
+                kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
             }
         }
     }
@@ -591,11 +607,11 @@ static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
     ptrdiff_t bucket_len;
     int is_https = 1, dns_compliant;
     char *query_start;
-    enum {s3_auto, s3_virtual, s3_path} address_style = s3_auto;
 
     if (!ad)
         return NULL;
     ad->mode = strchr(mode, 'r') ? 'r' : 'w';
+    ad->url_style = s3_auto;
 
     // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
 
@@ -647,9 +663,9 @@ static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
 
         if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) {
             if (strcasecmp(v, "virtual") == 0) {
-                address_style = s3_virtual;
+                ad->url_style = s3_virtual;
             } else if (strcasecmp(v, "path") == 0) {
-                address_style = s3_path;
+                ad->url_style = s3_path;
             }
         }
     }
@@ -669,11 +685,11 @@ static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
 
         if (url_style.l) {
             if (strcmp(url_style.s, "virtual") == 0) {
-                address_style = s3_virtual;
+                ad->url_style = s3_virtual;
             } else if (strcmp(url_style.s, "path") == 0) {
-                address_style = s3_path;
+                ad->url_style = s3_path;
             } else {
-                address_style = s3_auto;
+                ad->url_style = s3_auto;
             }
         }
         if (expiry_time.l) {
@@ -703,9 +719,9 @@ static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
             // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string
             // indicates use of path style adressing.
             if (strstr(url_style.s, "%(bucket)s") == NULL) {
-                address_style = s3_path;
+                ad->url_style = s3_path;
             } else {
-                address_style = s3_auto;
+                ad->url_style = s3_auto;
             }
         }
 
@@ -717,9 +733,9 @@ static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
 
 
     // if address_style is set, force the dns_compliant setting
-    if (address_style == s3_virtual) {
+    if (ad->url_style == s3_virtual) {
         dns_compliant = 1;
-    } else if (address_style == s3_path) {
+    } else if (ad->url_style == s3_path) {
         dns_compliant = 0;
     } else {
         dns_compliant = is_dns_compliant(bucket, path, is_https);
@@ -872,7 +888,7 @@ static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *sig
     const unsigned char service[] = "s3";
     const unsigned char request[] = "aws4_request";
 
-    kstring_t secret_access_key = {0, 0, NULL};
+    kstring_t secret_access_key = KS_INITIALIZE;
     unsigned int len;
     unsigned int i, j;
 
@@ -899,11 +915,11 @@ static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *sig
 
 
 static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
-    kstring_t signed_headers = {0, 0, NULL};
-    kstring_t canonical_headers = {0, 0, NULL};
-    kstring_t canonical_request = {0, 0, NULL};
-    kstring_t scope = {0, 0, NULL};
-    kstring_t string_to_sign = {0, 0, NULL};
+    kstring_t signed_headers = KS_INITIALIZE;
+    kstring_t canonical_headers = KS_INITIALIZE;
+    kstring_t canonical_request = KS_INITIALIZE;
+    kstring_t scope = KS_INITIALIZE;
+    kstring_t string_to_sign = KS_INITIALIZE;
     char cr_hash[HASH_LENGTH_SHA256];
     char signature_string[HASH_LENGTH_SHA256];
     int ret = -1;
@@ -1024,7 +1040,7 @@ static int order_query_string(kstring_t *qs) {
     int *query_offset = NULL;
     int num_queries, i;
     char **queries = NULL;
-    kstring_t ordered = {0, 0, NULL};
+    kstring_t ordered = KS_INITIALIZE;
     char *escaped = NULL;
     int ret = -1;
 
@@ -1298,6 +1314,24 @@ static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
 
         if (fp == NULL) goto error;
 
+        if (http_response == 307) {
+            // Follow additional redirect.
+            ad->refcount = 1;
+            hclose_abruptly(fp);
+
+            url.l  = 0;
+            ksprintf(&url, "https://%s%s", ad->host.s, ad->bucket);
+
+            fp = hopen(url.s, mode, "va_list", argsp,
+                   "httphdr_callback", v4_auth_header_callback,
+                   "httphdr_callback_data", ad,
+                   "redirect_callback", redirect_endpoint_callback,
+                   "redirect_callback_data", ad,
+                   "http_response_ptr", &http_response,
+                   "fail_on_error", 0,
+                   NULL);
+        }
+
         if (http_response == 400) {
             ad->refcount = 1;
             if (handle_400_response(fp, ad) != 0) {
@@ -1318,7 +1352,7 @@ static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
 
         if (fp == NULL) goto error;
     } else {
-        kstring_t final_url = {0, 0, NULL};
+        kstring_t final_url = KS_INITIALIZE;
 
          // add the scheme marker
         ksprintf(&final_url, "s3w+%s", url.s);
diff --git a/hts.c b/hts.c
index b361e7cfb..cf0a07d9f 100644
--- a/hts.c
+++ b/hts.c
@@ -1600,6 +1600,8 @@ htsFile *hts_hopen(hFILE *hfile, const char *fn, const char *mode)
     return NULL;
 }
 
+static int hts_idx_close_otf_fp(hts_idx_t *idx);
+
 int hts_close(htsFile *fp)
 {
     int ret = 0, save;
@@ -1653,6 +1655,14 @@ int hts_close(htsFile *fp)
         break;
     }
 
+    if (fp->idx) {
+        // Close deferred index file handle, if present.
+        // Unfortunately this means errors on the index will get mixed with
+        // those on the main file, but as we only have the EOF block left to
+        // write it hopefully won't happen that often.
+        ret |= hts_idx_close_otf_fp(fp->idx);
+    }
+
     save = errno;
     sam_hdr_destroy(fp->bam_header);
     hts_idx_destroy(fp->idx);
@@ -2203,6 +2213,7 @@ struct hts_idx_t {
         uint64_t off_beg, off_end;
         uint64_t n_mapped, n_unmapped;
     } z; // keep internal states
+    BGZF *otf_fp;  // Index on-the-fly output file
 };
 
 static char * idx_format_name(int fmt) {
@@ -2329,6 +2340,7 @@ hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_l
     }
     idx->tbi_n = -1;
     idx->last_tbi_tid = -1;
+    idx->otf_fp = NULL;
     return idx;
 }
 
@@ -2644,6 +2656,17 @@ static inline void swap_bins(bins_t *p)
     }
 }
 
+static int need_idx_ugly_delay_hack(const hts_idx_t *idx)
+{
+    // Ugly hack for on-the-fly BAI indexes.  As these are uncompressed,
+    // we need to delay writing a few bytes of data until file close
+    // so that we have something to force a modification time update.
+    //
+    // (For compressed indexes like CSI, the BGZF EOF block serves the same
+    // purpose).
+    return idx->otf_fp && !idx->otf_fp->is_compressed;
+}
+
 static int idx_save_core(const hts_idx_t *idx, BGZF *fp, int fmt)
 {
     int32_t i, j;
@@ -2696,7 +2719,12 @@ static int idx_save_core(const hts_idx_t *idx, BGZF *fp, int fmt)
         }
     }
 
-    check(idx_write_uint64(fp, idx->n_no_coor));
+    if (!need_idx_ugly_delay_hack(idx)) {
+        // Write this for compressed (CSI) indexes, but for BAI we
+        // need to save a bit for later.  See hts_idx_close_otf_fp()
+        check(idx_write_uint64(fp, idx->n_no_coor));
+    }
+
 #ifdef DEBUG_INDEX
     idx_dump(idx);
 #endif
@@ -2727,16 +2755,9 @@ int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt)
     return ret;
 }
 
-int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt)
+static int hts_idx_write_out(const hts_idx_t *idx, BGZF *fp, int fmt)
 {
-    BGZF *fp;
-
-    #define check(ret) if ((ret) < 0) goto fail
-
-    if (fnidx == NULL) return hts_idx_save(idx, fn, fmt);
-
-    fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w");
-    if (fp == NULL) return -1;
+    #define check(ret) if ((ret) < 0) return -1
 
     if (fmt == HTS_FMT_CSI) {
         check(bgzf_write(fp, "CSI\1", 4));
@@ -2752,12 +2773,64 @@ int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int
 
     check(idx_save_core(idx, fp, fmt));
 
-    return bgzf_close(fp);
     #undef check
+    return 0;
+}
 
-fail:
-    bgzf_close(fp);
-    return -1;
+int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt)
+{
+    BGZF *fp;
+
+    if (fnidx == NULL)
+        return hts_idx_save(idx, fn, fmt);
+
+    fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w");
+    if (fp == NULL) return -1;
+
+    if (hts_idx_write_out(idx, fp, fmt) < 0) {
+        int save_errno = errno;
+        bgzf_close(fp);
+        errno = save_errno;
+        return -1;
+    }
+
+    return bgzf_close(fp);
+}
+
+// idx_save for on-the-fly indexes.  Mostly duplicated from above, except
+// idx is not const because we want to store the file handle in it, and
+// the index file handle is not closed.  This allows the index file to be
+// closed after the EOF block on the indexed file has been written out,
+// so the modification times on the two files will be in the correct order.
+int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt)
+{
+    idx->otf_fp = bgzf_open(fnidx, (fmt == HTS_FMT_BAI)? "wu" : "w");
+    if (idx->otf_fp == NULL) return -1;
+
+    if (hts_idx_write_out(idx, idx->otf_fp, fmt) < 0) {
+        int save_errno = errno;
+        bgzf_close(idx->otf_fp);
+        idx->otf_fp = NULL;
+        errno = save_errno;
+        return -1;
+    }
+
+    return bgzf_flush(idx->otf_fp);
+}
+
+static int hts_idx_close_otf_fp(hts_idx_t *idx)
+{
+    if (idx && idx->otf_fp) {
+        int ret = 0;
+        if (need_idx_ugly_delay_hack(idx)) {
+            // BAI index - write out the bytes we deferred earlier
+            ret = idx_write_uint64(idx->otf_fp, idx->n_no_coor) < 0;
+        }
+        ret |= bgzf_close(idx->otf_fp) < 0;
+        idx->otf_fp = NULL;
+        return ret == 0 ? 0 : -1;
+    }
+    return 0;
 }
 
 static int idx_read_core(hts_idx_t *idx, BGZF *fp, int fmt)
@@ -4566,9 +4639,19 @@ static int idx_test_and_fetch(const char *fn, const char **local_fn, int *local_
 }
 
 /*
- * Check the existence of a local index file using part of the alignment file name.
- * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai
+ * Check the existence of a local index file using part of the alignment
+ * file name.
+ *
+ * For a filename fn of fn.fmt (eg fn.bam or fn.cram) the order of checks is
+ * fn.fmt.csi,  fn.csi,
+ * fn.fmt.bai,  fn.bai  - if fmt is HTS_FMT_BAI
+ * fn.fmt.tbi,  fn.tbi  - if fmt is HTS_FMT_TBI
+ * fn.fmt.crai, fn.crai - if fmt is HTS_FMT_CRAI
+ * fn.fmt.fai           - if fmt is HTS_FMT_FAI
+ *   also .gzi if fmt is ".gz"
+ *
  * @param fn    - pointer to the file name
+ * @param fmt   - one of the HTS_FMT index formats
  * @param fnidx - pointer to the index file name placeholder
  * @return        1 for success, 0 for failure
  */
@@ -4576,11 +4659,12 @@ int hts_idx_check_local(const char *fn, int fmt, char **fnidx) {
     int i, l_fn, l_ext;
     const char *fn_tmp = NULL;
     char *fnidx_tmp;
-    char *csi_ext = ".csi";
-    char *bai_ext = ".bai";
-    char *tbi_ext = ".tbi";
-    char *crai_ext = ".crai";
-    char *fai_ext = ".fai";
+    const char *csi_ext = ".csi";
+    const char *bai_ext = ".bai";
+    const char *tbi_ext = ".tbi";
+    const char *crai_ext = ".crai";
+    const char *fai_ext = ".fai";
+    const char *gzi_ext = ".gzi";
 
     if (!fn)
         return 0;
@@ -4677,10 +4761,21 @@ int hts_idx_check_local(const char *fn, int fmt, char **fnidx) {
                 }
         }
     } else if (fmt == HTS_FMT_FAI) { // Or .fai
-        strcpy(fnidx_tmp, fn_tmp); strcpy(fnidx_tmp + l_fn, fai_ext);
+        // Check .gzi if we have a .gz file
+        strcpy(fnidx_tmp, fn_tmp);
+        int gzi_ok = 1;
+        if ((l_fn > 3 && strcmp(fn_tmp+l_fn-3, ".gz") == 0) ||
+            (l_fn > 5 && strcmp(fn_tmp+l_fn-5, ".bgzf") == 0)) {
+            strcpy(fnidx_tmp + l_fn, gzi_ext);
+            gzi_ok = stat(fnidx_tmp, &sbuf)==0;
+        }
+
+        // Now check for .fai.  Occurs second as we're returning this
+        // in *fnidx irrespective of whether we did gzi check.
+        strcpy(fnidx_tmp + l_fn, fai_ext);
         *fnidx = fnidx_tmp;
-        if(stat(fnidx_tmp, &sbuf) == 0)
-            return 1;
+        if (stat(fnidx_tmp, &sbuf) == 0)
+            return gzi_ok;
         else
             return 0;
     }
diff --git a/hts_internal.h b/hts_internal.h
index 61956da21..191a55d16 100644
--- a/hts_internal.h
+++ b/hts_internal.h
@@ -67,6 +67,11 @@ void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset);
 
 int hts_idx_fmt(hts_idx_t *idx);
 
+// Internal interface to save on-the-fly indexes.  The index file handle
+// is kept open so hts_close() can close if after writing out the EOF
+// block for its own file.
+int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt);
+
 // Construct a unique filename based on fname and open it.
 struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname);
 
diff --git a/htsfile.1 b/htsfile.1
index bb7caf5a3..89a2fe446 100644
--- a/htsfile.1
+++ b/htsfile.1
@@ -1,4 +1,4 @@
-.TH htsfile 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools"
+.TH htsfile 1 "15 April 2024" "htslib-1.20" "Bioinformatics tools"
 .SH NAME
 htsfile \- identify high-throughput sequencing data files
 .\"
diff --git a/htslib-s3-plugin.7 b/htslib-s3-plugin.7
index ffbcd9c89..3bd868c71 100644
--- a/htslib-s3-plugin.7
+++ b/htslib-s3-plugin.7
@@ -1,4 +1,4 @@
-.TH htslib-s3-plugin 7 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools"
+.TH htslib-s3-plugin 7 "15 April 2024" "htslib-1.20" "Bioinformatics tools"
 .SH NAME
 htslib-s3-plugin \- htslib AWS S3 plugin
 .\"
diff --git a/htslib.map b/htslib.map
index 9542861bd..e342f55b5 100644
--- a/htslib.map
+++ b/htslib.map
@@ -636,3 +636,7 @@ HTSLIB_1.18 {
     bam_parse_basemod2;
     fai_thread_pool;
 } HTSLIB_1.17;
+
+HTSLIB_1.20 {
+    tbx_conf_gaf;
+} HTSLIB_1.18;
diff --git a/htslib/hts.h b/htslib/hts.h
index ef393306a..c5d99aba1 100644
--- a/htslib/hts.h
+++ b/htslib/hts.h
@@ -489,7 +489,7 @@ const char *hts_version(void);
 // Immediately after release, bump ZZ to 90 to distinguish in-development
 // Git repository builds from the release; you may wish to increment this
 // further when significant features are merged.
-#define HTS_VERSION 101901
+#define HTS_VERSION 102000
 
 /*! @abstract Introspection on the features enabled in htslib
  *
diff --git a/htslib/hts_defs.h b/htslib/hts_defs.h
index 357684012..e714e8fda 100644
--- a/htslib/hts_defs.h
+++ b/htslib/hts_defs.h
@@ -67,6 +67,12 @@ DEALINGS IN THE SOFTWARE.  */
 #define HTS_OPT3
 #endif
 
+#if HTS_COMPILER_HAS(aligned) || HTS_GCC_AT_LEAST(4,3)
+#define HTS_ALIGN32 __attribute__((aligned(32)))
+#else
+#define HTS_ALIGN32
+#endif
+
 // GCC introduced warn_unused_result in 3.4 but added -Wno-unused-result later
 #if HTS_COMPILER_HAS(__warn_unused_result__) || HTS_GCC_AT_LEAST(4,5)
 #define HTS_RESULT_USED __attribute__ ((__warn_unused_result__))
diff --git a/htslib/tbx.h b/htslib/tbx.h
index 3d2037cbb..f4b5bd856 100644
--- a/htslib/tbx.h
+++ b/htslib/tbx.h
@@ -38,6 +38,7 @@ extern "C" {
 #define TBX_GENERIC 0
 #define TBX_SAM     1
 #define TBX_VCF     2
+#define TBX_GAF     3
 #define TBX_UCSC    0x10000
 
 typedef struct tbx_conf_t {
@@ -53,7 +54,7 @@ typedef struct tbx_t {
 } tbx_t;
 
 HTSLIB_EXPORT
-extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sam, tbx_conf_vcf;
+extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sam, tbx_conf_vcf, tbx_conf_gaf;
 
     #define tbx_itr_destroy(iter) hts_itr_destroy(iter)
     #define tbx_itr_queryi(tbx, tid, beg, end) hts_itr_query((tbx)->idx, (tid), (beg), (end), tbx_readrec)
diff --git a/kstring.c b/kstring.c
index 958d2ef04..9a6142e80 100644
--- a/kstring.c
+++ b/kstring.c
@@ -1,7 +1,7 @@
 /* The MIT License
 
    Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
-   Copyright (C) 2013-2018, 2020-2021 Genome Research Ltd.
+   Copyright (C) 2013-2018, 2020-2021, 2023 Genome Research Ltd.
 
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
diff --git a/sam.c b/sam.c
index 82b3b9340..1a5519410 100644
--- a/sam.c
+++ b/sam.c
@@ -1,6 +1,6 @@
 /*  sam.c -- SAM and BAM file I/O and manipulation.
 
-    Copyright (C) 2008-2010, 2012-2023 Genome Research Ltd.
+    Copyright (C) 2008-2010, 2012-2024 Genome Research Ltd.
     Copyright (C) 2010, 2012, 2013 Broad Institute.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -708,7 +708,7 @@ static int bam_tag2cigar(bam1_t *b, int recal_bin, int give_warning) // return 0
     if (recal_bin)
         b->core.bin = hts_reg2bin(b->core.pos, bam_endpos(b), 14, 5);
     if (give_warning)
-        hts_log_error("%s encodes a CIGAR with %d operators at the CG tag", bam_get_qname(b), c->n_cigar);
+        hts_log_warning("%s encodes a CIGAR with %d operators at the CG tag", bam_get_qname(b), c->n_cigar);
     return 1;
 }
 
@@ -1099,7 +1099,7 @@ int sam_idx_save(htsFile *fp) {
         if (hts_idx_finish(fp->idx, bgzf_tell(fp->fp.bgzf)) < 0)
             return -1;
 
-        return hts_idx_save_as(fp->idx, NULL, fp->fnidx, hts_idx_fmt(fp->idx));
+        return hts_idx_save_but_not_close(fp->idx, fp->fnidx, hts_idx_fmt(fp->idx));
 
     } else if (fp->format.format == cram) {
         // flushed and closed by cram_close
@@ -2383,9 +2383,175 @@ int sam_hdr_change_HD(sam_hdr_t *h, const char *key, const char *val)
  *** SAM record I/O ***
  **********************/
 
-static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end,
-                            char *r, bam1_t *b)
-{
+// The speed of this code can vary considerably depending on minor code
+// changes elsewhere as some of the tight loops are particularly prone to
+// speed changes when the instruction blocks are split over a 32-byte
+// boundary.  To protect against this, we explicitly specify an alignment
+// for this function.  If this is insufficient, we may also wish to
+// consider alignment of blocks within this function via
+// __attribute__((optimize("align-loops=5"))) (gcc) or clang equivalents.
+// However it's not very portable.
+// Instead we break into separate functions so we can explicitly specify
+// use __attribute__((aligned(32))) instead and force consistent loop
+// alignment.
+static inline int64_t grow_B_array(bam1_t *b, uint32_t *n, size_t size) {
+    // Avoid overflow on 32-bit platforms, but it breaks BAM anyway
+    if (*n > INT32_MAX*0.666) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    size_t bytes = (size_t)size * (size_t)(*n>>1);
+    if (possibly_expand_bam_data(b, bytes) < 0) {
+        hts_log_error("Out of memory");
+        return -1;
+    }
+
+    (*n)+=*n>>1;
+    return 0;
+}
+
+
+// This ensures that q always ends up at the next comma after
+// reading a number even if it's followed by junk.  It
+// prevents the possibility of trying to read more than n items.
+#define skip_to_comma_(q) do { while (*(q) > '\t' && *(q) != ',') (q)++; } while (0)
+
+HTS_ALIGN32
+static char *sam_parse_Bc_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 1) < 0)
+                return NULL;
+        }
+        *(b->data + b->l_data) = hts_str2int(q + 1, &q, 8, overflow);
+        b->l_data++;
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static char *sam_parse_BC_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 1) < 0)
+                return NULL;
+        }
+        if (q[1] != '-') {
+            *(b->data + b->l_data) = hts_str2uint(q + 1, &q, 8, overflow);
+            b->l_data++;
+        } else {
+            *overflow = 1;
+            q++;
+            skip_to_comma_(q);
+        }
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static char *sam_parse_Bs_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 2) < 0)
+                return NULL;
+        }
+        i16_to_le(hts_str2int(q + 1, &q, 16, overflow),
+                  b->data + b->l_data);
+        b->l_data += 2;
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static char *sam_parse_BS_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 2) < 0)
+                return NULL;
+        }
+        if (q[1] != '-') {
+            u16_to_le(hts_str2uint(q + 1, &q, 16, overflow),
+                      b->data + b->l_data);
+            b->l_data += 2;
+        } else {
+            *overflow = 1;
+            q++;
+            skip_to_comma_(q);
+        }
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static char *sam_parse_Bi_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 4) < 0)
+                return NULL;
+        }
+        i32_to_le(hts_str2int(q + 1, &q, 32, overflow),
+                  b->data + b->l_data);
+        b->l_data += 4;
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static char *sam_parse_BI_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 4) < 0)
+                return NULL;
+        }
+        if (q[1] != '-') {
+            u32_to_le(hts_str2uint(q + 1, &q, 32, overflow),
+                      b->data + b->l_data);
+            b->l_data += 4;
+        } else {
+            *overflow = 1;
+            q++;
+            skip_to_comma_(q);
+        }
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static char *sam_parse_Bf_vals(bam1_t *b, char *q, uint32_t *nused,
+                               uint32_t *nalloc, int *overflow) {
+    while (*q == ',') {
+        if ((*nused)++ >= (*nalloc)) {
+            if (grow_B_array(b, nalloc, 4) < 0)
+                return NULL;
+        }
+        float_to_le(strtod(q + 1, &q), b->data + b->l_data);
+        b->l_data += 4;
+    }
+    return q;
+}
+
+HTS_ALIGN32
+static int sam_parse_B_vals_r(char type, uint32_t nalloc, char *in,
+                              char **end, bam1_t *b,
+                              int *ctr) {
+    // Protect against infinite recursion when dealing with invalid input.
+    // An example string is "XX:B:C,-".  The lack of a number means min=0,
+    // but it overflowed due to "-" and so we repeat ad-infinitum.
+    //
+    // Loop detection is the safest solution incase there are other
+    // strange corner cases with malformed inputs.
+    if (++(*ctr) > 2) {
+        hts_log_error("Malformed data in B:%c array", type);
+        return -1;
+    }
+
     int orig_l = b->l_data;
     char *q = in;
     int32_t size;
@@ -2398,80 +2564,60 @@ static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end,
         return -1;
     }
 
-    // Ensure space for type + values
-    bytes = (size_t) n * (size_t) size;
-    if (bytes / size != n
+    // Ensure space for type + values.
+    // The first pass through here we don't know the number of entries and
+    // nalloc == 0.  We start with a small working set and then parse the
+    // data, growing as needed.
+    //
+    // If we have a second pass through we do know the number of entries
+    // and nalloc is already known.  We have no need to expand the bam data.
+    if (!nalloc)
+         nalloc=7;
+
+    // Ensure allocated memory is big enough (for current nalloc estimate)
+    bytes = (size_t) nalloc * (size_t) size;
+    if (bytes / size != nalloc
         || possibly_expand_bam_data(b, bytes + 2 + sizeof(uint32_t))) {
         hts_log_error("Out of memory");
         return -1;
     }
 
+    uint32_t nused = 0;
+
     b->data[b->l_data++] = 'B';
     b->data[b->l_data++] = type;
-    i32_to_le(n, b->data + b->l_data);
+    // 32-bit B-array length is inserted later once we know it.
+    int b_len_idx = b->l_data;
     b->l_data += sizeof(uint32_t);
-    // This ensures that q always ends up at the next comma after
-    // reading a number even if it's followed by junk.  It
-    // prevents the possibility of trying to read more than n items.
-#define skip_to_comma_(q) do { while (*(q) > '\t' && *(q) != ',') (q)++; } while (0)
+
     if (type == 'c') {
-        while (q < r) {
-            *(b->data + b->l_data) = hts_str2int(q + 1, &q, 8, &overflow);
-            b->l_data++;
-            skip_to_comma_(q);
-        }
+        if (!(q = sam_parse_Bc_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
     } else if (type == 'C') {
-        while (q < r) {
-            if (*q != '-') {
-                *(b->data + b->l_data) = hts_str2uint(q + 1, &q, 8, &overflow);
-                b->l_data++;
-            } else {
-                overflow = 1;
-            }
-            skip_to_comma_(q);
-        }
+        if (!(q = sam_parse_BC_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
     } else if (type == 's') {
-        while (q < r) {
-            i16_to_le(hts_str2int(q + 1, &q, 16, &overflow), b->data + b->l_data);
-            b->l_data += 2;
-            skip_to_comma_(q);
-        }
+        if (!(q = sam_parse_Bs_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
     } else if (type == 'S') {
-        while (q < r) {
-            if (*q != '-') {
-                u16_to_le(hts_str2uint(q + 1, &q, 16, &overflow), b->data + b->l_data);
-                b->l_data += 2;
-            } else {
-                overflow = 1;
-            }
-            skip_to_comma_(q);
-        }
+        if (!(q = sam_parse_BS_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
     } else if (type == 'i') {
-        while (q < r) {
-            i32_to_le(hts_str2int(q + 1, &q, 32, &overflow), b->data + b->l_data);
-            b->l_data += 4;
-            skip_to_comma_(q);
-        }
+        if (!(q = sam_parse_Bi_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
     } else if (type == 'I') {
-        while (q < r) {
-            if (*q != '-') {
-                u32_to_le(hts_str2uint(q + 1, &q, 32, &overflow), b->data + b->l_data);
-                b->l_data += 4;
-            } else {
-                overflow = 1;
-            }
-            skip_to_comma_(q);
-        }
+        if (!(q = sam_parse_BI_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
     } else if (type == 'f') {
-        while (q < r) {
-            float_to_le(strtod(q + 1, &q), b->data + b->l_data);
-            b->l_data += 4;
-            skip_to_comma_(q);
-        }
-    } else {
-        hts_log_error("Unrecognized type B:%c", type);
+        if (!(q = sam_parse_Bf_vals(b, q, &nused, &nalloc, &overflow)))
+            return -1;
+    }
+    if (*q != '\t' && *q != '\0') {
+        // Unknown B array type or junk in the numbers
+        hts_log_error("Malformed B:%c", type);
         return -1;
     }
+    i32_to_le(nused, b->data + b_len_idx);
 
     if (!overflow) {
         *end = q;
@@ -2479,6 +2625,7 @@ static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end,
     } else {
         int64_t max = 0, min = 0, val;
         // Given type was incorrect.  Try to rescue the situation.
+        char *r = q;
         q = in;
         overflow = 0;
         b->l_data = orig_l;
@@ -2493,19 +2640,19 @@ static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end,
         if (!overflow) {
             if (min < 0) {
                 if (min >= INT8_MIN && max <= INT8_MAX) {
-                    return sam_parse_B_vals('c', n, in, end, r, b);
+                    return sam_parse_B_vals_r('c', nalloc, in, end, b, ctr);
                 } else if (min >= INT16_MIN && max <= INT16_MAX) {
-                    return sam_parse_B_vals('s', n, in, end, r, b);
+                    return sam_parse_B_vals_r('s', nalloc, in, end, b, ctr);
                 } else if (min >= INT32_MIN && max <= INT32_MAX) {
-                    return sam_parse_B_vals('i', n, in, end, r, b);
+                    return sam_parse_B_vals_r('i', nalloc, in, end, b, ctr);
                 }
             } else {
                 if (max < UINT8_MAX) {
-                    return sam_parse_B_vals('C', n, in, end, r, b);
+                    return sam_parse_B_vals_r('C', nalloc, in, end, b, ctr);
                 } else if (max <= UINT16_MAX) {
-                    return sam_parse_B_vals('S', n, in, end, r, b);
+                    return sam_parse_B_vals_r('S', nalloc, in, end, b, ctr);
                 } else if (max <= UINT32_MAX) {
-                    return sam_parse_B_vals('I', n, in, end, r, b);
+                    return sam_parse_B_vals_r('I', nalloc, in, end, b, ctr);
                 }
             }
         }
@@ -2516,6 +2663,14 @@ static int sam_parse_B_vals(char type, uint32_t n, char *in, char **end,
 #undef skip_to_comma_
 }
 
+HTS_ALIGN32
+static int sam_parse_B_vals(char type, char *in, char **end, bam1_t *b)
+{
+    int ctr = 0;
+    uint32_t nalloc = 0;
+    return sam_parse_B_vals_r(type, nalloc, in, end, b, &ctr);
+}
+
 static inline unsigned int parse_sam_flag(char *v, char **rv, int *overflow) {
     if (*v >= '1' && *v <= '9') {
         return hts_str2uint(v, rv, 16, overflow);
@@ -2659,16 +2814,11 @@ static inline int aux_parse(char *start, char *end, bam1_t *b, int lenient,
             b->data[b->l_data++] = '\0';
             q = end;
         } else if (type == 'B') {
-            uint32_t n;
-            char *r;
             type = *q++; // q points to the first ',' following the typing byte
             _parse_err(*q && *q != ',' && *q != '\t',
                        "B aux field type not followed by ','");
 
-            for (r = q, n = 0; *r > '\t'; ++r)
-                if (*r == ',') ++n;
-
-            if (sam_parse_B_vals(type, n, q, &q, r, b) < 0)
+            if (sam_parse_B_vals(type, q, &q, b) < 0)
                 goto err_ret;
         } else _parse_err(1, "unrecognized type %s", hts_strprint(logbuf, sizeof logbuf, '\'', &type, 1));
 
@@ -4298,8 +4448,8 @@ static int sam_format1_append(const bam_hdr_t *h, const bam1_t *b, kstring_t *st
     return str->l;
 
  bad_aux:
-    hts_log_error("Corrupted aux data for read %.*s",
-                  b->core.l_qname, bam_get_qname(b));
+    hts_log_error("Corrupted aux data for read %.*s flag %d",
+                  b->core.l_qname, bam_get_qname(b), b->core.flag);
     errno = EINVAL;
     return -1;
 
@@ -4706,7 +4856,8 @@ uint8_t *bam_aux_next(const bam1_t *b, const uint8_t *s)
     return next+2;
 
  bad_aux:
-    hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
+    hts_log_error("Corrupted aux data for read %s flag %d",
+                  bam_get_qname(b), b->core.flag);
     errno = EINVAL;
     return NULL;
 }
@@ -4728,7 +4879,8 @@ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
     return NULL;
 
  bad_aux:
-    hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
+    hts_log_error("Corrupted aux data for read %s flag %d",
+                  bam_get_qname(b), b->core.flag);
     errno = EINVAL;
     return NULL;
 }
@@ -4752,7 +4904,8 @@ uint8_t *bam_aux_remove(bam1_t *b, uint8_t *s)
     return s;
 
  bad_aux:
-    hts_log_error("Corrupted aux data for read %s", bam_get_qname(b));
+    hts_log_error("Corrupted aux data for read %s flag %d",
+                  bam_get_qname(b), b->core.flag);
     errno = EINVAL;
     return NULL;
 }
@@ -5557,6 +5710,8 @@ void bam_plp_destroy(bam_plp_t iter)
     lbnode_t *p, *pnext;
     if ( iter->overlaps ) kh_destroy(olap_hash, iter->overlaps);
     for (p = iter->head; p != NULL; p = pnext) {
+        if (iter->plp_destruct && p != iter->tail)
+            iter->plp_destruct(iter->data, &p->b, &p->cd);
         pnext = p->next;
         mp_free(iter->mp, p);
     }
@@ -5706,8 +5861,7 @@ static int tweak_overlap_quality(bam1_t *a, bam1_t *b)
     // Loop over the overlapping region nulling qualities in either
     // seq a or b.
     int err = 0;
-    while ( 1 )
-    {
+    while ( 1 ) {
         // Step to next matching reference position in a and b
         while ( a_ret >= 0 && a_iref>=0 && a_iref < iref - a->core.pos )
             a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max,
@@ -5716,8 +5870,6 @@ static int tweak_overlap_quality(bam1_t *a, bam1_t *b)
             err = a_ret<-1?-1:0;
             break;
         }
-        if ( iref < a_iref + a->core.pos )
-            iref = a_iref + a->core.pos;
 
         while ( b_ret >= 0 && b_iref>=0 && b_iref < iref - b->core.pos )
             b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max, &b_icig,
@@ -5726,14 +5878,55 @@ static int tweak_overlap_quality(bam1_t *a, bam1_t *b)
             err = b_ret<-1?-1:0;
             break;
         }
+
+        if ( iref < a_iref + a->core.pos )
+            iref = a_iref + a->core.pos;
+
         if ( iref < b_iref + b->core.pos )
             iref = b_iref + b->core.pos;
 
         iref++;
 
-        if ( a_iref+a->core.pos != b_iref+b->core.pos )
-            // only CMATCH positions, don't know what to do with indels
-            continue;
+        // If A or B has a deletion then we catch up the other to this point.
+        // We also amend quality values using the same rules for mismatch.
+        if (a_iref+a->core.pos != b_iref+b->core.pos) {
+            if (a_iref+a->core.pos < b_iref+b->core.pos
+                && b_cigar > bam_get_cigar(b)
+                && bam_cigar_op(b_cigar[-1]) == BAM_CDEL) {
+                // Del in B means it's moved on further than A
+                do {
+                    a_qual[a_iseq] = amul
+                        ? a_qual[a_iseq]*0.8
+                        : 0;
+                    a_ret = cigar_iref2iseq_next(&a_cigar, a_cigar_max,
+                                                 &a_icig, &a_iseq, &a_iref);
+                    if (a_ret < 0)
+                        return -(a_ret<-1); // 0 or -1
+                } while (a_iref + a->core.pos < b_iref+b->core.pos);
+            } else if (a_cigar > bam_get_cigar(a)
+                       && bam_cigar_op(a_cigar[-1]) == BAM_CDEL) {
+                // Del in A means it's moved on further than B
+                do {
+                    b_qual[b_iseq] = bmul
+                        ? b_qual[b_iseq]*0.8
+                        : 0;
+                    b_ret = cigar_iref2iseq_next(&b_cigar, b_cigar_max,
+                                                 &b_icig, &b_iseq, &b_iref);
+                    if (b_ret < 0)
+                        return -(b_ret<-1); // 0 or -1
+                } while (b_iref + b->core.pos < a_iref+a->core.pos);
+            } else {
+                // Anything else, eg ref-skip, we don't support here
+                continue;
+            }
+        }
+
+        // fprintf(stderr, "a_cig=%ld,%ld b_cig=%ld,%ld iref=%ld "
+        //         "a_iref=%ld b_iref=%ld a_iseq=%ld b_iseq=%ld\n",
+        //         a_cigar-bam_get_cigar(a), a_icig,
+        //         b_cigar-bam_get_cigar(b), b_icig,
+        //         iref, a_iref+a->core.pos+1, b_iref+b->core.pos+1,
+        //         a_iseq, b_iseq);
 
         if (a_iseq > a->core.l_qseq || b_iseq > b->core.l_qseq)
             // Fell off end of sequence, bad CIGAR?
diff --git a/sam_mods.c b/sam_mods.c
index fe8db85f7..e45f26d91 100644
--- a/sam_mods.c
+++ b/sam_mods.c
@@ -1,6 +1,6 @@
 /*  sam_mods.c -- Base modification handling in SAM and BAM.
 
-    Copyright (C) 2020-2023 Genome Research Ltd.
+    Copyright (C) 2020-2024 Genome Research Ltd.
 
     Author: James Bonfield <jkb@sanger.ac.uk>
 
@@ -24,6 +24,7 @@ DEALINGS IN THE SOFTWARE.  */
 
 #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
 #include <config.h>
+#include <assert.h>
 
 #include "htslib/sam.h"
 #include "textutils_internal.h"
@@ -245,7 +246,7 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state,
     }
 
     uint8_t *mi = bam_aux_get(b, "MN");
-    if (mi && bam_aux2i(mi) != b->core.l_qseq) {
+    if (mi && bam_aux2i(mi) != b->core.l_qseq && b->core.l_qseq) {
         // bam_aux2i with set errno = EINVAL and return 0 if the tag
         // isn't integer, but 0 will be a seq-length mismatch anyway so
         // triggers an error here too.
@@ -359,7 +360,7 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state,
                 if (!cp_end) {
                     // empty list
                     delta = INT_MAX;
-                    cp_end = cp+1;
+                    cp_end = cp;
                 }
             }
             // Now delta is first in list or computed remainder,
@@ -378,7 +379,7 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state,
                 }
                 state->MMcount  [mod_num] = delta;
                 if (b->core.flag & BAM_FREVERSE) {
-                    state->MM   [mod_num] = cp+1;
+                    state->MM   [mod_num] = me+1;
                     state->MMend[mod_num] = cp_end;
                     state->ML   [mod_num] = ml ? ml+n +(ndelta-1)*stride: NULL;
                 } else {
@@ -426,6 +427,10 @@ int bam_parse_basemod2(const bam1_t *b, hts_base_mod_state *state,
             }
         }
     }
+    if (ml && ml != ml_end) {
+        hts_log_error("%s: Too many entries in ML tag", bam_get_qname(b));
+        return -1;
+    }
 
     state->nmods = mod_num;
 
@@ -496,6 +501,11 @@ int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state,
         if (b->core.flag & BAM_FREVERSE) {
             // process MM list backwards
             char *cp;
+            if (state->MMend[i]-1 < state->MM[i]) {
+                // Should be impossible to hit if coding is correct
+                hts_log_error("Assert failed while processing base modification states");
+                return -1;
+            }
             for (cp = state->MMend[i]-1; cp != state->MM[i]; cp--)
                 if (*cp == ',')
                     break;
@@ -544,9 +554,6 @@ int bam_mods_at_next_pos(const bam1_t *b, hts_base_mod_state *state,
  */
 int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state,
                      hts_base_mod *mods, int n_mods, int *pos) {
-    if (state->seq_pos >= b->core.l_qseq)
-        return 0;
-
     // Look through state->MMcount arrays to see when the next lowest is
     // per base type;
     int next[16], freq[16] = {0}, i;
@@ -579,18 +586,6 @@ int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state,
     }
     *pos = state->seq_pos = i;
 
-    if (i >= b->core.l_qseq) {
-        // Check for more MM elements than bases present.
-        for (i = 0; i < state->nmods; i++) {
-            if (!(b->core.flag & BAM_FREVERSE) &&
-                state->MMcount[i] < 0x7f000000) {
-                hts_log_warning("MM tag refers to bases beyond sequence length");
-                return -1;
-            }
-        }
-        return 0;
-    }
-
     if (b->core.flag & BAM_FREVERSE) {
         for (i = 0; i < state->nmods; i++)
             state->MMcount[i] -= freq[seqi_rc[state->canonical[i]]];
@@ -599,6 +594,23 @@ int bam_next_basemod(const bam1_t *b, hts_base_mod_state *state,
             state->MMcount[i] -= freq[state->canonical[i]];
     }
 
+    if (b->core.l_qseq && state->seq_pos >= b->core.l_qseq &&
+        !(b->core.flag & BAM_FREVERSE)) {
+        // Spots +ve orientation run-overs.
+        // The -ve orientation is spotted in bam_parse_basemod2
+        int i;
+        for (i = 0; i < state->nmods; i++) {
+            // Check if any remaining items in MM after hitting the end
+            // of the sequence.
+            if (state->MMcount[i] < 0x7f000000 ||
+                (*state->MM[i]!=0 && *state->MM[i]!=';')) {
+                hts_log_warning("MM tag refers to bases beyond sequence length");
+                return -1;
+            }
+        }
+        return 0;
+    }
+
     int r = bam_mods_at_next_pos(b, state, mods, n_mods);
     return r > 0 ? r : 0;
 }
diff --git a/tabix.1 b/tabix.1
index 0d852a9ea..9bf1d6891 100644
--- a/tabix.1
+++ b/tabix.1
@@ -1,10 +1,10 @@
-.TH tabix 1 "22 January 2024" "htslib-1.19.1" "Bioinformatics tools"
+.TH tabix 1 "15 April 2024" "htslib-1.20" "Bioinformatics tools"
 .SH NAME
 .PP
 tabix \- Generic indexer for TAB-delimited genome position files
 .\"
 .\" Copyright (C) 2009-2011 Broad Institute.
-.\" Copyright (C) 2014, 2016, 2018, 2020, 2022 Genome Research Ltd.
+.\" Copyright (C) 2014, 2016, 2018, 2020, 2022, 2024 Genome Research Ltd.
 .\"
 .\" Author: Heng Li <lh3@sanger.ac.uk>
 .\"
@@ -78,6 +78,8 @@ and BAI index formats can handle individual chromosomes up to 512 Mbp
 If your input file might contain data lines with begin or end positions
 greater than that, you will need to use a CSI index.
 
+Multiple threads can be used for operations except listing of sequence names.
+
 .SH INDEXING OPTIONS
 .TP 10
 .B -0, --zero-based
@@ -167,6 +169,10 @@ The default is 3, which turns on error and warning messages;
 2 reduces warning messages;
 1 prints only error messages and 0 is mostly silent.
 Values higher than 3 produce additional informational and debugging messages.
+.TP
+.BI "-@, --threads " INT
+Set number of threads to use for the operation.
+The default is 0, where no extra threads are in use.
 .PP
 .SH EXAMPLE
 (grep "^#" in.gff; grep -v "^#" in.gff | sort -t"`printf '\(rst'`" -k1,1 -k4,4n) | bgzip > sorted.gff.gz;
diff --git a/tabix.c b/tabix.c
index e20c0feba..2fb5d4bc4 100644
--- a/tabix.c
+++ b/tabix.c
@@ -1,7 +1,7 @@
 /*  tabix.c -- Generic indexer for TAB-delimited genome position files.
 
     Copyright (C) 2009-2011 Broad Institute.
-    Copyright (C) 2010-2012, 2014-2020 Genome Research Ltd.
+    Copyright (C) 2010-2012, 2014-2020, 2024 Genome Research Ltd.
 
     Author: Heng Li <lh3@sanger.ac.uk>
 
@@ -44,11 +44,16 @@ DEALINGS IN THE SOFTWARE.  */
 #include "htslib/regidx.h"
 #include "htslib/hts_defs.h"
 #include "htslib/hts_log.h"
+#include "htslib/thread_pool.h"
+
+//for easy coding
+#define RELEASE_TPOOL(X) { hts_tpool *ptr = (hts_tpool*)(X); if (ptr) { hts_tpool_destroy(ptr); } }
+#define bam_index_build3(fn, min_shift, nthreads) (sam_index_build3((fn), NULL, (min_shift), (nthreads)))
 
 typedef struct
 {
     char *regions_fname, *targets_fname;
-    int print_header, header_only, cache_megs, download_index, separate_regs;
+    int print_header, header_only, cache_megs, download_index, separate_regs, threads;
 }
 args_t;
 
@@ -92,6 +97,7 @@ error_errno(const char *format, ...)
 #define IS_BCF  (1<<4)
 #define IS_BAM  (1<<5)
 #define IS_CRAM (1<<6)
+#define IS_GAF  (1<<7)
 #define IS_TXT  (IS_GFF|IS_BED|IS_SAM|IS_VCF)
 
 int file_type(const char *fname)
@@ -104,6 +110,7 @@ int file_type(const char *fname)
     else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF;
     else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM;
     else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM;
+    else if (l>=7 && strcasecmp(fname+l-7, ".gaf.gz") == 0) return IS_GAF;
 
     htsFile *fp = hts_open(fname,"r");
     if (!fp) {
@@ -199,41 +206,70 @@ static char **parse_regions(char *regions_fname, char **argv, int argc, int *nre
 static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **regs, int nregs)
 {
     int i;
+    htsThreadPool tpool = {NULL, 0};
     htsFile *fp = hts_open(fname,"r");
     if ( !fp ) error_errno("Could not open \"%s\"", fname);
     enum htsExactFormat format = hts_get_format(fp)->format;
-
     if (args->cache_megs)
         hts_set_cache_size(fp, args->cache_megs * 1048576);
 
+    //set threads if needed, errors are logged and ignored
+    if (args->threads >= 1) {
+        if (!(tpool.pool = hts_tpool_init(args->threads))) {
+            hts_log_info("Could not initialize thread pool!");
+        }
+        if (hts_set_thread_pool(fp, &tpool) < 0) {
+            hts_log_info("Could not set thread pool!");
+        }
+    }
+
     regidx_t *reg_idx = NULL;
     if ( args->targets_fname )
     {
         reg_idx = regidx_init(args->targets_fname, NULL, NULL, 0, NULL);
-        if (!reg_idx)
+        if (!reg_idx) {
+            RELEASE_TPOOL(tpool.pool);
             error_errno("Could not build region list for \"%s\"",
                         args->targets_fname);
+        }
     }
 
     if ( format == bcf )
     {
         htsFile *out = hts_open("-","w");
-        if ( !out ) error_errno("Could not open stdout");
+        if ( !out ) {
+            RELEASE_TPOOL(tpool.pool);
+            error_errno("Could not open stdout");
+        }
+        if (hts_set_thread_pool(out, &tpool) < 0) {
+            hts_log_info("Could not set thread pool to output file!");
+        }
         hts_idx_t *idx = bcf_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0);
-        if ( !idx ) error_errno("Could not load .csi index of \"%s\"", fname);
+        if ( !idx ) {
+            RELEASE_TPOOL(tpool.pool);
+            error_errno("Could not load .csi index of \"%s\"", fname);
+        }
 
         bcf_hdr_t *hdr = bcf_hdr_read(fp);
-        if ( !hdr ) error_errno("Could not read the header from \"%s\"", fname);
+        if ( !hdr ) {
+            RELEASE_TPOOL(tpool.pool);
+            error_errno("Could not read the header from \"%s\"", fname);
+        }
 
         if ( args->print_header ) {
-            if ( bcf_hdr_write(out,hdr)!=0 )
+            if ( bcf_hdr_write(out,hdr)!=0 ) {
+                RELEASE_TPOOL(tpool.pool);
                 error_errno("Failed to write to stdout");
+            }
         }
         if ( !args->header_only )
         {
             assert(regs != NULL);
             bcf1_t *rec = bcf_init();
-            if (!rec) error_errno(NULL);
+            if (!rec) {
+                RELEASE_TPOOL(tpool.pool);
+                error_errno(NULL);
+            }
             for (i=0; i<nregs; i++)
             {
                 int ret, found = 0;
@@ -245,6 +281,7 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
                     {
                         const char *chr = bcf_seqname(hdr,rec);
                         if (!chr) {
+                            RELEASE_TPOOL(tpool.pool);
                             error("Bad BCF record in \"%s\" : "
                                   "Invalid CONTIG id %d\n",
                                   fname, rec->rid);
@@ -256,19 +293,23 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
                         found = 1;
                     }
                     if ( bcf_write(out,hdr,rec)!=0 ) {
+                        RELEASE_TPOOL(tpool.pool);
                         error_errno("Failed to write to stdout");
                     }
                 }
 
                 if (ret < -1) {
+                    RELEASE_TPOOL(tpool.pool);
                     error_errno("Reading \"%s\" failed", fname);
                 }
                 bcf_itr_destroy(itr);
             }
             bcf_destroy(rec);
         }
-        if ( hts_close(out) )
+        if ( hts_close(out) ) {
+            RELEASE_TPOOL(tpool.pool);
             error_errno("hts_close returned non-zero status for stdout");
+        }
 
         bcf_hdr_destroy(hdr);
         hts_idx_destroy(idx);
@@ -276,7 +317,10 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
     else if ( format==vcf || format==sam || format==bed || format==text_format || format==unknown_format )
     {
         tbx_t *tbx = tbx_index_load3(fname, NULL, args->download_index ? HTS_IDX_SAVE_REMOTE : 0);
-        if ( !tbx ) error_errno("Could not load .tbi/.csi index of %s", fname);
+        if ( !tbx ) {
+            RELEASE_TPOOL(tpool.pool);
+            error_errno("Could not load .tbi/.csi index of %s", fname);
+        }
         kstring_t str = {0,0,0};
         if ( args->print_header )
         {
@@ -284,10 +328,15 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
             while ((ret = hts_getline(fp, KS_SEP_LINE, &str)) >= 0)
             {
                 if ( !str.l || str.s[0]!=tbx->conf.meta_char ) break;
-                if (puts(str.s) < 0)
+                if (puts(str.s) < 0) {
+                    RELEASE_TPOOL(tpool.pool);
                     error_errno("Error writing to stdout");
+                }
+            }
+            if (ret < -1) {
+                RELEASE_TPOOL(tpool.pool);
+                error_errno("Reading \"%s\" failed", fname);
             }
-            if (ret < -1) error_errno("Reading \"%s\" failed", fname);
         }
         if ( !args->header_only )
         {
@@ -295,7 +344,10 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
             const char **seq = NULL;
             if ( reg_idx ) {
                 seq = tbx_seqnames(tbx, &nseq);
-                if (!seq) error_errno("Failed to get sequence names list");
+                if (!seq) {
+                    RELEASE_TPOOL(tpool.pool);
+                    error_errno("Failed to get sequence names list");
+                }
             }
             for (i=0; i<nregs; i++)
             {
@@ -309,10 +361,15 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
                         if (args->separate_regs) printf("%c%s\n", conf->meta_char, regs[i]);
                         found = 1;
                     }
-                    if (puts(str.s) < 0)
+                    if (puts(str.s) < 0) {
+                        RELEASE_TPOOL(tpool.pool);
                         error_errno("Failed to write to stdout");
+                    }
+                }
+                if (ret < -1) {
+                    RELEASE_TPOOL(tpool.pool);
+                    error_errno("Reading \"%s\" failed", fname);
                 }
-                if (ret < -1) error_errno("Reading \"%s\" failed", fname);
                 tbx_itr_destroy(itr);
             }
             free(seq);
@@ -320,15 +377,20 @@ static int query_regions(args_t *args, tbx_conf_t *conf, char *fname, char **reg
         free(str.s);
         tbx_destroy(tbx);
     }
-    else if ( format==bam )
+    else if ( format==bam ) {
+        RELEASE_TPOOL(tpool.pool);
         error("Please use \"samtools view\" for querying BAM files.\n");
+    }
 
     if ( reg_idx ) regidx_destroy(reg_idx);
-    if ( hts_close(fp) )
+    if ( hts_close(fp) ) {
+        RELEASE_TPOOL(tpool.pool);
         error_errno("hts_close returned non-zero status: %s", fname);
+    }
 
     for (i=0; i<nregs; i++) free(regs[i]);
     free(regs);
+    RELEASE_TPOOL(tpool.pool);
     return 0;
 }
 static int query_chroms(char *fname, int download)
@@ -372,12 +434,28 @@ static int query_chroms(char *fname, int download)
     return 0;
 }
 
-int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf)
+int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *conf, int threads)
 {
+    hts_tpool *tpool = NULL;
+    if (threads >= 1) {
+        if (!(tpool = hts_tpool_init(threads))) {
+            hts_log_info("Could not initialize thread pool!");
+        }
+    }
     if ( ftype & IS_TXT || !ftype )
     {
         BGZF *fp = bgzf_open(fname,"r");
-        if ( !fp || bgzf_read_block(fp) != 0 || !fp->block_length ) return -1;
+        if (!fp) {
+            RELEASE_TPOOL(tpool);
+            return -1;
+        }
+        if (bgzf_thread_pool(fp, tpool, 0) < 0) {
+            hts_log_info("Could not set thread pool!");
+        }
+        if (bgzf_read_block(fp) != 0 || !fp->block_length ) {
+            RELEASE_TPOOL(tpool);
+            return -1;
+        }
 
         char *buffer = fp->uncompressed_block;
         int skip_until = 0;
@@ -393,7 +471,10 @@ int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *
                     skip_until++;
                     if ( skip_until>=fp->block_length )
                     {
-                        if ( bgzf_read_block(fp) != 0 || !fp->block_length ) error("FIXME: No body in the file: %s\n", fname);
+                        if ( bgzf_read_block(fp) != 0 || !fp->block_length ) {
+                            RELEASE_TPOOL(tpool);
+                            error("FIXME: No body in the file: %s\n", fname);
+                        }
                         skip_until = 0;
                     }
                     // The header has finished
@@ -402,7 +483,10 @@ int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *
                 skip_until++;
                 if ( skip_until>=fp->block_length )
                 {
-                    if (bgzf_read_block(fp) != 0 || !fp->block_length) error("FIXME: No body in the file: %s\n", fname);
+                    if (bgzf_read_block(fp) != 0 || !fp->block_length) {
+                        RELEASE_TPOOL(tpool);
+                        error("FIXME: No body in the file: %s\n", fname);
+                    }
                     skip_until = 0;
                 }
             }
@@ -410,31 +494,55 @@ int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *
 
         // Output the new header
         FILE *hdr  = fopen(header,"r");
-        if ( !hdr ) error("%s: %s", header,strerror(errno));
+        if ( !hdr ) {
+            RELEASE_TPOOL(tpool);
+            error("%s: %s", header,strerror(errno));
+        }
         const size_t page_size = 32768;
         char *buf = malloc(page_size);
         BGZF *bgzf_out = bgzf_open("-", "w");
         ssize_t nread;
 
-        if (!buf) error("%s\n", strerror(errno));
-        if (!bgzf_out)
+        if (!buf) {
+            RELEASE_TPOOL(tpool);
+            error("%s\n", strerror(errno));
+        }
+        if (!bgzf_out) {
+            RELEASE_TPOOL(tpool);
             error_errno("Couldn't open output stream");
+        }
+        if (bgzf_thread_pool(bgzf_out, tpool, 0) < 0) {
+            hts_log_info("Could not set thread pool to output file!");
+        }
         while ( (nread=fread(buf,1,page_size-1,hdr))>0 )
         {
             if ( nread<page_size-1 && buf[nread-1]!='\n' ) buf[nread++] = '\n';
-            if (bgzf_write(bgzf_out, buf, nread) < 0)
+            if (bgzf_write(bgzf_out, buf, nread) < 0) {
+                RELEASE_TPOOL(tpool);
                 error_errno("Write error %d", bgzf_out->errcode);
+            }
+        }
+        if ( ferror(hdr) ) {
+            RELEASE_TPOOL(tpool);
+            error_errno("Failed to read \"%s\"", header);
+        }
+        if ( fclose(hdr) ) {
+            RELEASE_TPOOL(tpool);
+            error_errno("Closing \"%s\" failed", header);
         }
-        if ( ferror(hdr) ) error_errno("Failed to read \"%s\"", header);
-        if ( fclose(hdr) ) error_errno("Closing \"%s\" failed", header);
 
         // Output all remaining data read with the header block
         if ( fp->block_length - skip_until > 0 )
         {
-            if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) error_errno("Write error %d",fp->errcode);
+            if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) {
+                RELEASE_TPOOL(tpool);
+                error_errno("Write error %d",fp->errcode);
+            }
         }
-        if (bgzf_flush(bgzf_out) < 0)
+        if (bgzf_flush(bgzf_out) < 0) {
+            RELEASE_TPOOL(tpool);
             error_errno("Write error %d", bgzf_out->errcode);
+        }
 
         while (1)
         {
@@ -442,17 +550,30 @@ int reheader_file(const char *fname, const char *header, int ftype, tbx_conf_t *
             if ( nread<=0 ) break;
 
             int count = bgzf_raw_write(bgzf_out, buf, nread);
-            if (count != nread) error_errno("Write failed, wrote %d instead of %d bytes", count,(int)nread);
+            if (count != nread) {
+                RELEASE_TPOOL(tpool);
+                error_errno("Write failed, wrote %d instead of %d bytes", count,(int)nread);
+            }
+        }
+        if (nread < 0) {
+            RELEASE_TPOOL(tpool);
+            error_errno("Error reading \"%s\"", fname);
         }
-        if (nread < 0) error_errno("Error reading \"%s\"", fname);
-        if (bgzf_close(bgzf_out) < 0)
+        if (bgzf_close(bgzf_out) < 0) {
+            RELEASE_TPOOL(tpool);
             error_errno("Error %d closing output", bgzf_out->errcode);
-        if (bgzf_close(fp) < 0)
+        }
+        if (bgzf_close(fp) < 0) {
+            RELEASE_TPOOL(tpool);
             error_errno("Error %d closing \"%s\"", bgzf_out->errcode, fname);
+        }
         free(buf);
     }
-    else
+    else {
+        RELEASE_TPOOL(tpool);
         error("todo: reheader BCF, BAM\n");  // BCF is difficult, records contain pointers to the header.
+    }
+    RELEASE_TPOOL(tpool);
     return 0;
 }
 
@@ -470,7 +591,7 @@ static int usage(FILE *fp, int status)
     fprintf(fp, "   -e, --end INT              column number for region end (if no end, set INT to -b) [5]\n");
     fprintf(fp, "   -f, --force                overwrite existing index without asking\n");
     fprintf(fp, "   -m, --min-shift INT        set minimal interval size for CSI indices to 2^INT [14]\n");
-    fprintf(fp, "   -p, --preset STR           gff, bed, sam, vcf\n");
+    fprintf(fp, "   -p, --preset STR           gff, bed, sam, vcf, gaf\n");
     fprintf(fp, "   -s, --sequence INT         column number for sequence names (suppressed by -p) [1]\n");
     fprintf(fp, "   -S, --skip-lines INT       skip first INT lines [0]\n");
     fprintf(fp, "\n");
@@ -485,6 +606,7 @@ static int usage(FILE *fp, int status)
     fprintf(fp, "       --cache INT            set cache size to INT megabytes (0 disables) [10]\n");
     fprintf(fp, "       --separate-regions     separate the output by corresponding regions\n");
     fprintf(fp, "       --verbosity INT        set verbosity [3]\n");
+    fprintf(fp, "   -@, --threads INT          number of additional threads to use [0]\n");
     fprintf(fp, "\n");
     return status;
 }
@@ -523,11 +645,12 @@ int main(int argc, char *argv[])
         {"verbosity", required_argument, NULL, 3},
         {"cache", required_argument, NULL, 4},
         {"separate-regions", no_argument, NULL, 5},
+        {"threads", required_argument, NULL, '@'},
         {NULL, 0, NULL, 0}
     };
 
     char *tmp;
-    while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:D", loptions,NULL)) >= 0)
+    while ((c = getopt_long(argc, argv, "hH?0b:c:e:fm:p:s:S:lr:CR:T:D@:", loptions,NULL)) >= 0)
     {
         switch (c)
         {
@@ -561,6 +684,7 @@ int main(int argc, char *argv[])
                 else if (strcmp(optarg, "bed") == 0) conf = tbx_conf_bed;
                 else if (strcmp(optarg, "sam") == 0) conf = tbx_conf_sam;
                 else if (strcmp(optarg, "vcf") == 0) conf = tbx_conf_vcf;
+                else if (strcmp(optarg, "gaf") == 0) conf = tbx_conf_gaf;
                 else if (strcmp(optarg, "bcf") == 0) detect = 1; // bcf is autodetected, preset is not needed
                 else if (strcmp(optarg, "bam") == 0) detect = 1; // same as bcf
                 else error("The preset string not recognised: '%s'\n", optarg);
@@ -602,6 +726,9 @@ int main(int argc, char *argv[])
             case 5:
                 args.separate_regs = 1;
                 break;
+            case '@':   //thread count
+                args.threads = atoi(optarg);
+                break;
             default: return usage(stderr, EXIT_FAILURE);
         }
     }
@@ -620,6 +747,7 @@ int main(int argc, char *argv[])
     {
         if ( ftype==IS_GFF ) conf = tbx_conf_gff;
         else if ( ftype==IS_BED ) conf = tbx_conf_bed;
+        else if ( ftype==IS_GAF ) conf = tbx_conf_gaf;
         else if ( ftype==IS_SAM ) conf = tbx_conf_sam;
         else if ( ftype==IS_VCF )
         {
@@ -651,7 +779,7 @@ int main(int argc, char *argv[])
     if ( min_shift!=0 && !do_csi ) do_csi = 1;
 
     if ( reheader )
-        return reheader_file(fname, reheader, ftype, &conf);
+        return reheader_file(fname, reheader, ftype, &conf, args.threads);
 
     char *suffix = ".tbi";
     if ( do_csi ) suffix = ".csi";
@@ -677,42 +805,42 @@ int main(int argc, char *argv[])
     int ret;
     if ( ftype==IS_CRAM )
     {
-        if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
+        if ( bam_index_build3(fname, min_shift, args.threads)!=0 ) error("bam_index_build failed: %s\n", fname);
         return 0;
     }
     else if ( do_csi )
     {
         if ( ftype==IS_BCF )
         {
-            if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\n", fname);
+            if ( bcf_index_build3(fname, NULL, min_shift, args.threads)!=0 ) error("bcf_index_build failed: %s\n", fname);
             return 0;
         }
         if ( ftype==IS_BAM )
         {
-            if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\n", fname);
+            if ( bam_index_build3(fname, min_shift, args.threads)!=0 ) error("bam_index_build failed: %s\n", fname);
             return 0;
         }
 
-        switch (ret = tbx_index_build(fname, min_shift, &conf))
+        switch (ret = tbx_index_build3(fname, NULL, min_shift, args.threads, &conf))
         {
             case 0:
                 return 0;
             case -2:
                 error("[tabix] the compression of '%s' is not BGZF\n", fname);
             default:
-                error("tbx_index_build failed: %s\n", fname);
+                error("tbx_index_build3 failed: %s\n", fname);
         }
     }
     else    // TBI index
     {
-        switch (ret = tbx_index_build(fname, min_shift, &conf))
+        switch (ret = tbx_index_build3(fname, NULL, min_shift, args.threads, &conf))
         {
             case 0:
                 return 0;
             case -2:
                 error("[tabix] the compression of '%s' is not BGZF\n", fname);
             default:
-                error("tbx_index_build failed: %s\n", fname);
+                error("tbx_index_build3 failed: %s\n", fname);
         }
     }
 
diff --git a/tbx.c b/tbx.c
index c2c5c6f9d..5f861299a 100644
--- a/tbx.c
+++ b/tbx.c
@@ -53,6 +53,7 @@ const tbx_conf_t tbx_conf_sam = { TBX_SAM, 3, 4, 0, '@', 0 };
 
 HTSLIB_EXPORT
 const tbx_conf_t tbx_conf_vcf = { TBX_VCF, 1, 2, 0, '#', 0 };
+const tbx_conf_t tbx_conf_gaf = { TBX_GAF, 1, 6, 0, '#', 0 };
 
 typedef struct {
     int64_t beg, end;
@@ -64,6 +65,7 @@ static inline int get_tid(tbx_t *tbx, const char *ss, int is_add)
 {
     khint_t k;
     khash_t(s2i) *d;
+    if ((tbx->conf.preset&0xffff) == TBX_GAF) return(0);
     if (tbx->dict == 0) tbx->dict = kh_init(s2i);
     if (!tbx->dict) return -1; // Out of memory
     d = (khash_t(s2i)*)tbx->dict;
@@ -103,24 +105,45 @@ int tbx_parse1(const tbx_conf_t *conf, size_t len, char *line, tbx_intv_t *intv)
                 intv->ss = line + b; intv->se = line + i;
             } else if (id == conf->bc) {
                 // here ->beg is 0-based.
-                intv->beg = strtoll(line + b, &s, 0);
+                if ((conf->preset&0xffff) == TBX_GAF){
+                    // if gaf find the smallest and largest node id
+                    char *t;
+                    int64_t nodeid = -1;
+                    for (s = line + b + 1; s < line + i;) {
+                        nodeid = strtoll(s, &t, 0);
+                        if(intv->beg == -1){
+                            intv->beg = intv->end = nodeid;
+                        } else {
+                            if(nodeid < intv->beg){
+                                intv->beg = nodeid;
+                            }
 
-                if (conf->bc <= conf->ec) // don't overwrite an already set end point
-                    intv->end = intv->beg;
+                            if(nodeid > intv->end){
+                                intv->end = nodeid;
+                            }
+                        }
+                        s = t + 1;
+                    }
+                } else {
+                    intv->beg = strtoll(line + b, &s, 0);
 
-                if ( s==line+b ) return -1; // expected int
+                    if (conf->bc <= conf->ec) // don't overwrite an already set end point
+                        intv->end = intv->beg;
 
-                if (!(conf->preset&TBX_UCSC))
-                    --intv->beg;
-                else if (conf->bc <= conf->ec)
-                    ++intv->end;
+                    if ( s==line+b ) return -1; // expected int
 
-                if (intv->beg < 0) {
-                    hts_log_warning("Coordinate <= 0 detected. "
-                                    "Did you forget to use the -0 option?");
-                    intv->beg = 0;
+                    if (!(conf->preset&TBX_UCSC))
+                        --intv->beg;
+                    else if (conf->bc <= conf->ec)
+                        ++intv->end;
+
+                    if (intv->beg < 0) {
+                        hts_log_warning("Coordinate <= 0 detected. "
+                                        "Did you forget to use the -0 option?");
+                        intv->beg = 0;
+                    }
+                    if (intv->end < 1) intv->end = 1;
                 }
-                if (intv->end < 1) intv->end = 1;
             } else {
                 if ((conf->preset&0xffff) == TBX_GENERIC) {
                     if (id == conf->ec)
@@ -187,7 +210,13 @@ static inline int get_intv(tbx_t *tbx, kstring_t *str, tbx_intv_t *intv, int is_
 {
     if (tbx_parse1(&tbx->conf, str->l, str->s, intv) == 0) {
         int c = *intv->se;
-        *intv->se = '\0'; intv->tid = get_tid(tbx, intv->ss, is_add); *intv->se = c;
+        *intv->se = '\0';
+        if ((tbx->conf.preset&0xffff) == TBX_GAF){
+            intv->tid = 0;
+        } else {
+            intv->tid = get_tid(tbx, intv->ss, is_add);
+        }
+        *intv->se = c;
         if (intv->tid < 0) return -2;  // get_tid out of memory
         return (intv->beg >= 0 && intv->end >= 0)? 0 : -1;
     } else {
@@ -196,6 +225,7 @@ static inline int get_intv(tbx_t *tbx, kstring_t *str, tbx_intv_t *intv, int is_
         {
             case TBX_SAM: type = "TBX_SAM"; break;
             case TBX_VCF: type = "TBX_VCF"; break;
+            case TBX_GAF: type = "TBX_GAF"; break;
             case TBX_UCSC: type = "TBX_UCSC"; break;
             default: type = "TBX_GENERIC"; break;
         }
diff --git a/test/base_mods/MM-bounds+.sam b/test/base_mods/MM-bounds+.sam
new file mode 100644
index 000000000..03a112dab
--- /dev/null
+++ b/test/base_mods/MM-bounds+.sam
@@ -0,0 +1,2 @@
+@SQ	SN:I	LN:999
+r1	0	I	1	0	36M	*	0	0	AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA	DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A	Mm:Z:C+m,2,2,1,4,1,0;C+h,6,7;N+n,15,2;	Ml:B:C,128,153,179,204,230,0,159,6,215,240
diff --git a/test/base_mods/MM-bounds-.sam b/test/base_mods/MM-bounds-.sam
new file mode 100644
index 000000000..3f54798fe
--- /dev/null
+++ b/test/base_mods/MM-bounds-.sam
@@ -0,0 +1,2 @@
+@SQ	SN:I	LN:999
+r1-	16	I	1	0	36M	*	0	0	AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA	DF?GCH88.EG8.7@E9G8A?H9.:C?8,@,,9F@A	Mm:Z:G-m,0,1,4,1,2,2;G-h,0,7;N-n,17,2;	Ml:B:C,230,204,179,153,128,0,6,159,240,215
diff --git a/test/base_mods/MM-chebi.sam b/test/base_mods/MM-chebi.sam
index 0ec8b9ddb..475a7d599 100644
--- a/test/base_mods/MM-chebi.sam
+++ b/test/base_mods/MM-chebi.sam
@@ -1,2 +1,2 @@
 @CO	Separate m, h and N modifications
-*	0	*	0	0	*	*	0	0	AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA	*	Mm:Z:C+m,2,2,1,4,1;C+76792,6,7;N+n,15;	Ml:B:C,102,128,153,179,204,161,33,212,169
+*	0	*	0	0	*	*	0	0	AGCTCTCCAGAGTCGNACGCCATYCGCGCGCCACCA	*	Mm:Z:C+m,2,2,1,4,1;C+76792,6,7;N+n,15;	Ml:B:C,102,128,153,179,204,161,33,212
diff --git a/test/base_mods/base-mods.tst b/test/base_mods/base-mods.tst
index 889c3780e..5899c80a0 100644
--- a/test/base_mods/base-mods.tst
+++ b/test/base_mods/base-mods.tst
@@ -57,3 +57,7 @@ P MM-pileup2.out $pileup_mod < MM-pileup2.sam
 P MM-pileup.out $pileup_mod < MM-MNp.sam
 N MM-pileup.out $pileup_mod < MM-MNf1.sam
 N MM-pileup.out $pileup_mod < MM-MNf2.sam
+N MM-pileup.out $test_mod < MM-MNf1.sam
+N MM-pileup.out $test_mod < MM-MNf2.sam
+N MM-pileup.out $test_mod < MM-bounds+.sam
+N MM-pileup.out $test_mod < MM-bounds-.sam
diff --git a/test/index.bcf.csi b/test/index.bcf.csi
index dbebfe13a..13d0e9f17 100644
Binary files a/test/index.bcf.csi and b/test/index.bcf.csi differ
diff --git a/test/mpileup/c1#pad1.out b/test/mpileup/c1#pad1.out
index cbac51ad8..eda860011 100644
--- a/test/mpileup/c1#pad1.out
+++ b/test/mpileup/c1#pad1.out
@@ -1,10 +1,10 @@
-c1	1	9	^!A^!A^!A^!A^!A^!A^!A^!A^!A
-c1	2	9	AAAAAAAAA-3()
-c1	3	9	CCCCCCCC*
-c1	4	9	CCCCCCCC-1()*
-c1	5	9	GGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()
-c1	6	9	CCCCCCC**
-c1	7	9	GGGGGGGG*
-c1	8	9	GGGGGGGG*
-c1	9	9	TTTTTTTTT
-c1	10	9	T$T$T$T$T$T$T$T$T$
+c1	1	9	^!A^!A^!A^!A^!A^!A^!A^!A^!A	~~~~~~~~~
+c1	2	9	AAAAAAAAA-3()	~~~~~~~~~
+c1	3	9	CCCCCCCC*	~~~~~~~~~
+c1	4	9	CCCCCCCC-1()*	~~~~~~~~~
+c1	5	9	GGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()	~~~~~~~~~
+c1	6	9	CCCCCCC**	~~~~~~~~~
+c1	7	9	GGGGGGGG*	~~~~~~~~~
+c1	8	9	GGGGGGGG*	~~~~~~~~~
+c1	9	9	TTTTTTTTT	~~~~~~~~~
+c1	10	9	T$T$T$T$T$T$T$T$T$	~~~~~~~~~
diff --git a/test/mpileup/c1#pad2.out b/test/mpileup/c1#pad2.out
index 9cab78a87..7bab80ec2 100644
--- a/test/mpileup/c1#pad2.out
+++ b/test/mpileup/c1#pad2.out
@@ -1,10 +1,10 @@
-c1	1	12	^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!*
-c1	2	12	AAAAAAAAAA-3()A*
-c1	3	12	CCCCCCCCC*C*
-c1	4	12	CCCCCCCCC-1()*C-2()*
-c1	5	13	GGGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()**+6(**TA**)-5()^!G+6(**TA**)$
-c1	6	12	CCCCCCCC****
-c1	7	12	GGGGGGGGG*G*
-c1	8	12	GGGGGGGGG*G*
-c1	9	12	TTTTTTTTTTT*
-c1	10	12	T$T$T$T$T$T$T$T$T$T$T$*$
+c1	1	12	^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!A^!*	~~~~~~~~~~~~
+c1	2	12	AAAAAAAAAA-3()A*	~~~~~~~~~~~~
+c1	3	12	CCCCCCCCC*C*	~~~~~~~~~~~~
+c1	4	12	CCCCCCCCC-1()*C-2()*	~~~~~~~~~~~~
+c1	5	13	GGGGG+6(GTTAAC)G+6(*TTAA*)G+6(GTT***)G+6(***AAC)*+6(**TA**)-1()*+6(GTTAAC)-3()**+6(**TA**)-5()^!G+6(**TA**)$	~~~~~~~~~~~~~
+c1	6	12	CCCCCCCC****	~~~~~~~~~~~~
+c1	7	12	GGGGGGGGG*G*	~~~~~~~~~~~~
+c1	8	12	GGGGGGGGG*G*	~~~~~~~~~~~~
+c1	9	12	TTTTTTTTTTT*	~~~~~~~~~~~~
+c1	10	12	T$T$T$T$T$T$T$T$T$T$T$*$	~~~~~~~~~~~~
diff --git a/test/mpileup/c1#pad3.out b/test/mpileup/c1#pad3.out
index d56eae596..e0ce41871 100644
--- a/test/mpileup/c1#pad3.out
+++ b/test/mpileup/c1#pad3.out
@@ -1,5 +1,5 @@
-c1	6	11	^!C^!C^!C^!C^!C^!C^!C^!C^!*^!*^!*
-c1	7	11	GGGGGGGGG*G
-c1	8	11	GGGGGGGGG*G
-c1	9	11	TTTTTTTTTTT
-c1	10	11	T$T$T$T$T$T$T$T$T$T$T$
+c1	6	11	^!C^!C^!C^!C^!C^!C^!C^!C^!*^!*^!*	~~~~~~~~~~~
+c1	7	11	GGGGGGGGG*G	~~~~~~~~~~~
+c1	8	11	GGGGGGGGG*G	~~~~~~~~~~~
+c1	9	11	TTTTTTTTTTT	~~~~~~~~~~~
+c1	10	11	T$T$T$T$T$T$T$T$T$T$T$	~~~~~~~~~~~
diff --git a/test/mpileup/mp_D.out b/test/mpileup/mp_D.out
index 656cedb5d..72f1c433e 100644
--- a/test/mpileup/mp_D.out
+++ b/test/mpileup/mp_D.out
@@ -1,11 +1,11 @@
-z	2	3	^!A^!A^!*
-z	3	3	GG*
-z	4	3	CCC
-z	5	3	TT-3()T
-z	6	3	T*T
-z	7	3	A*A
-z	8	3	G*G
-z	9	3	CCC
-z	10	3	AAA-2()
-z	11	3	GG*
-z	12	3	G$G$*$
+z	2	3	^!A^!A^!*	002
+z	3	3	GG*	112
+z	4	3	CCC	222
+z	5	3	TT-3()T	333
+z	6	3	T*T	474
+z	7	3	A*A	575
+z	8	3	G*G	676
+z	9	3	CCC	777
+z	10	3	AAA-2()	888
+z	11	3	GG*	99~
+z	12	3	G$G$*$	00~
diff --git a/test/mpileup/mp_DI.out b/test/mpileup/mp_DI.out
index 8a2ff2e2a..5818c16ff 100644
--- a/test/mpileup/mp_DI.out
+++ b/test/mpileup/mp_DI.out
@@ -1,12 +1,12 @@
-z	2	5	^!A^!A^!A^!*^!*
-z	3	5	GGG*+2(AA)*+2(*A)
-z	4	5	CCCCC
-z	5	5	TTTTT
-z	6	5	TTTTT
-z	7	5	AAAAA
-z	8	5	G-2()G-2()G-2()GG
-z	9	5	***CC
-z	10	5	*+2(TT)*+2(TT)$*+2(*T)$AA
-z	11	3	GGG
-z	12	3	GG$G$
-z	13	1	C$
+z	2	5	^!A^!A^!A^!*^!*	000AB
+z	3	5	GGG*+2(AA)*+2(*A)	111AB
+z	4	5	CCCCC	22222
+z	5	5	TTTTT	33333
+z	6	5	TTTTT	44444
+z	7	5	AAAAA	55555
+z	8	5	G-2()G-2()G-2()GG	66666
+z	9	5	***CC	AAB77
+z	10	5	*+2(TT)*+2(TT)$*+2(*T)$AA	AAB88
+z	11	3	GGG	999
+z	12	3	GG$G$	000
+z	13	1	C$	1
diff --git a/test/mpileup/mp_I.out b/test/mpileup/mp_I.out
index c437a49d9..d62e9928c 100644
--- a/test/mpileup/mp_I.out
+++ b/test/mpileup/mp_I.out
@@ -1,11 +1,11 @@
-z	2	3	^!A^!A^!A
-z	3	3	GGG
-z	4	3	CCC
-z	5	3	TT+3(CCC)T
-z	6	3	TTT
-z	7	3	AAA
-z	8	3	GGG
-z	9	3	CCC
-z	10	3	AAA
-z	11	3	GGG
-z	12	3	G$G$G+2(=A)$
+z	2	3	^!A^!A^!A	000
+z	3	3	GGG	111
+z	4	3	CCC	222
+z	5	3	TT+3(CCC)T	333
+z	6	3	TTT	444
+z	7	3	AAA	555
+z	8	3	GGG	666
+z	9	3	CCC	777
+z	10	3	AAA	888
+z	11	3	GGG	999
+z	12	3	G$G$G+2(=A)$	000
diff --git a/test/mpileup/mp_ID.out b/test/mpileup/mp_ID.out
index 4f88f51e0..4f83ef4b7 100644
--- a/test/mpileup/mp_ID.out
+++ b/test/mpileup/mp_ID.out
@@ -1,12 +1,12 @@
-z	2	3	^!A^!A^!A
-z	3	3	GGG
-z	4	5	CCC^!*^!*
-z	5	5	TTT**
-z	6	5	TTTTT
-z	7	5	AAAAA
-z	8	5	G+2(TT)-2()G+2(TT)-2()G+2(T*)-2()GG
-z	9	5	***CC
-z	10	5	**$*$AA
-z	11	3	GGG
-z	12	3	GG$G$
-z	13	1	C$
+z	2	3	^!A^!A^!A	000
+z	3	3	GGG	111
+z	4	5	CCC^!*^!*	22244
+z	5	5	TTT**	33344
+z	6	5	TTTTT	44444
+z	7	5	AAAAA	55555
+z	8	5	G+2(TT)-2()G+2(TT)-2()G+2(T*)-2()GG	66666
+z	9	5	***CC	9~~77
+z	10	5	**$*$AA	9~~88
+z	11	3	GGG	999
+z	12	3	GG$G$	000
+z	13	1	C$	1
diff --git a/test/mpileup/mp_N.out b/test/mpileup/mp_N.out
index 695e4634c..0b8dede38 100644
--- a/test/mpileup/mp_N.out
+++ b/test/mpileup/mp_N.out
@@ -1,11 +1,11 @@
-z	2	3	^!A^!A^!>
-z	3	3	GG>
-z	4	3	CCC
-z	5	3	TTT
-z	6	3	T>T
-z	7	3	A>A
-z	8	3	G>G
-z	9	3	CCC
-z	10	3	AAA
-z	11	3	GG>
-z	12	3	G$G$>$
+z	2	3	^!A^!A^!>	002
+z	3	3	GG>	112
+z	4	3	CCC	222
+z	5	3	TTT	333
+z	6	3	T>T	474
+z	7	3	A>A	575
+z	8	3	G>G	676
+z	9	3	CCC	777
+z	10	3	AAA	888
+z	11	3	GG>	99~
+z	12	3	G$G$>$	00~
diff --git a/test/mpileup/mp_N2.out b/test/mpileup/mp_N2.out
index baf168364..5ade66a5e 100644
--- a/test/mpileup/mp_N2.out
+++ b/test/mpileup/mp_N2.out
@@ -1,13 +1,13 @@
-z	1	6	^!T^!T^!T^!T^!T^!T
-z	2	6	AAAAAA
-z	3	6	GGGGGG
-z	4	6	C+2(AA)-5()C+2(A*)-5()C+2(*A)-5()C+2(AA)C+2(A*)C+2(*A)
-z	5	6	***>>>
-z	6	6	***>>>
-z	7	6	***>>>
-z	8	6	***>>>
-z	9	6	*+2(TT)*+2(*T)*+2(T*)>+2(TT)>+2(*T)>+2(T*)
-z	10	6	AAAAAA
-z	11	6	GGGGGG
-z	12	6	GGGGGG
-z	13	6	T$T$T$T$T$T$
+z	1	6	^!T^!T^!T^!T^!T^!T	AAAAAA
+z	2	6	AAAAAA	BBBBBB
+z	3	6	GGGGGG	CCCCCC
+z	4	6	C+2(AA)-5()C+2(A*)-5()C+2(*A)-5()C+2(AA)C+2(A*)C+2(*A)	DDDDDD
+z	5	6	***>>>	GHGGHG
+z	6	6	***>>>	GHGGHG
+z	7	6	***>>>	GHGGHG
+z	8	6	***>>>	GHGGHG
+z	9	6	*+2(TT)*+2(*T)*+2(T*)>+2(TT)>+2(*T)>+2(T*)	GHGGHG
+z	10	6	AAAAAA	IIIIII
+z	11	6	GGGGGG	JJJJJJ
+z	12	6	GGGGGG	KKKKKK
+z	13	6	T$T$T$T$T$T$	LLLLLL
diff --git a/test/mpileup/mp_P.out b/test/mpileup/mp_P.out
index 471b7d7fc..003bed0d2 100644
--- a/test/mpileup/mp_P.out
+++ b/test/mpileup/mp_P.out
@@ -1,10 +1,10 @@
-z	2	5	^!A^!A^!A^!A^!A
-z	3	5	GGGGG
-z	4	5	CCCCC
-z	5	5	TTTTT
-z	6	5	TT+4(GGCC)T+4(GG**)T+4(*GC*)T+4(**CC)
-z	7	5	AAAAA
-z	8	5	GGGGG
-z	9	5	CCCCC
-z	10	5	AAAAA
-z	11	5	G$G$G$G$G$
+z	2	5	^!A^!A^!A^!A^!A	00000
+z	3	5	GGGGG	11111
+z	4	5	CCCCC	22222
+z	5	5	TTTTT	33333
+z	6	5	TT+4(GGCC)T+4(GG**)T+4(*GC*)T+4(**CC)	44444
+z	7	5	AAAAA	55555
+z	8	5	GGGGG	66666
+z	9	5	CCCCC	77777
+z	10	5	AAAAA	88888
+z	11	5	G$G$G$G$G$	99999
diff --git a/test/mpileup/mp_overlap1.out b/test/mpileup/mp_overlap1.out
new file mode 100644
index 000000000..56d70b0dd
--- /dev/null
+++ b/test/mpileup/mp_overlap1.out
@@ -0,0 +1,12 @@
+1	100003	2	^St^+T	{!
+1	100004	2	aA-5()	{!
+1	100005	2	a*	N!
+1	100006	2	g*	N!
+1	100007	2	c*	N!
+1	100008	2	a*	N!
+1	100009	2	c*	N!
+1	100010	2	aA	{!
+1	100011	2	cC	{!
+1	100012	2	aA	{!
+1	100013	2	gG	{!
+1	100014	2	a$A$	{!
diff --git a/test/mpileup/mp_overlap1.sam b/test/mpileup/mp_overlap1.sam
new file mode 100644
index 000000000..0e3d14bf3
--- /dev/null
+++ b/test/mpileup/mp_overlap1.sam
@@ -0,0 +1,4 @@
+@HD	VN:1.5	SO:coordinate
+@SQ	SN:1	LN:249250621
+r1	147	1	100003	50	12M	*	0	0	TAAGCACACAGA	ZZZZZZZZZZZZ
+r1	99	1	100003	10	2M5D5M	*	0	0	TAACAGA	BBBBBBB
diff --git a/test/mpileup/mp_overlap2.out b/test/mpileup/mp_overlap2.out
new file mode 100644
index 000000000..7e5af6dbc
--- /dev/null
+++ b/test/mpileup/mp_overlap2.out
@@ -0,0 +1,12 @@
+1	100003	2	^+T^St	{!
+1	100004	2	A-5()a	{!
+1	100005	2	*a	{!
+1	100006	2	*g	{!
+1	100007	2	*c	{!
+1	100008	2	*a	{!
+1	100009	2	*c	{!
+1	100010	2	Aa	{!
+1	100011	2	Cc	{!
+1	100012	2	Aa	{!
+1	100013	2	Gg	{!
+1	100014	2	A$a$	{!
diff --git a/test/mpileup/mp_overlap2.sam b/test/mpileup/mp_overlap2.sam
new file mode 100644
index 000000000..ba9b517c5
--- /dev/null
+++ b/test/mpileup/mp_overlap2.sam
@@ -0,0 +1,4 @@
+@HD	VN:1.5	SO:coordinate
+@SQ	SN:1	LN:249250621
+r1	99	1	100003	10	2M5D5M	*	0	0	TAACAGA	BBBBBBB
+r1	147	1	100003	50	12M	*	0	0	TAAGCACACAGA	ZZZZZZZZZZZZ
diff --git a/test/mpileup/mpileup.tst b/test/mpileup/mpileup.tst
index 4ffbd3481..534383ea0 100644
--- a/test/mpileup/mpileup.tst
+++ b/test/mpileup/mpileup.tst
@@ -71,3 +71,8 @@ P c1#pad3.out $pileup -m c1#pad3.sam
 
 # Issue #852.  Problem caused by alignments with entirely S/I ops in CIGAR.
 P small.out $pileup -m small.bam
+
+# Overlap removal and the effect on quality values
+P mp_overlap1.out $pileup -m mp_overlap1.sam
+P mp_overlap2.out $pileup -m mp_overlap2.sam
+
diff --git a/test/mpileup/small.out b/test/mpileup/small.out
index b5c161024..13f943a53 100644
--- a/test/mpileup/small.out
+++ b/test/mpileup/small.out
@@ -1,322 +1,322 @@
-2	1	1	^]T
-2	2	1	G
-2	3	1	G
-2	4	1	A
-2	5	1	G
-2	6	1	A
-2	7	1	G
-2	8	1	C
-2	9	1	A
-2	10	1	C
-2	11	1	A
-2	12	1	T
-2	13	1	A
-2	14	1	A
-2	15	1	C
-2	16	1	T
-2	17	1	T
-2	18	1	G
-2	19	1	G
-2	20	1	G
-2	21	1	T
-2	22	1	G
-2	23	1	A
-2	24	1	G
-2	25	1	A
-2	26	1	T
-2	27	1	G
-2	28	1	A
-2	29	1	T
-2	30	1	G
-2	31	2	A^]A
-2	32	2	AA
-2	33	2	AA
-2	34	2	TT
-2	35	2	GG
-2	36	2	AA
-2	37	2	GG
-2	38	2	CC
-2	39	2	AA
-2	40	2	CC
-2	41	2	TT
-2	42	2	GG
-2	43	2	GG
-2	44	2	CC
-2	45	2	TT
-2	46	2	TT
-2	47	2	TT
-2	48	2	GG
-2	49	2	GG
-2	50	2	AA
-2	51	2	GG
-2	52	2	TT
-2	53	2	CC
-2	54	2	AA
-2	55	2	CC
-2	56	2	AA
-2	57	2	CC
-2	58	2	AA
-2	59	2	GG
-2	60	2	AA
-2	61	2	CC
-2	62	2	CC
-2	63	2	AA
-2	64	3	GG^]g
-2	65	3	GGg
-2	66	3	GGg
-2	67	4	TTt^]t
-2	68	4	CCcc
-2	69	4	CCcc
-2	70	4	AAaa
-2	71	4	GGgg
-2	72	4	GGgg
-2	73	4	CCcc
-2	74	4	GGgg
-2	75	4	C$Ccc
-2	76	3	Ccc
-2	77	3	Ttt
-2	78	3	Agg
-2	79	3	Ttt
-2	80	3	Aat
-2	81	3	Ccc
-2	82	3	Ccc
-2	83	3	Aaa
-2	84	3	Ttt
-2	85	3	Aaa
-2	86	3	Aaa
-2	87	3	Ccc
-2	88	3	Acc
-2	89	3	Ccc
-2	90	3	Ttt
-2	91	3	Ccc
-2	92	3	Tgt
-2	93	3	Aaa
-2	94	3	Ggg
-2	95	3	Ttt
-2	96	3	Ggg
-2	97	3	Ggg
-2	98	3	Ttt
-2	99	3	Ggg
-2	100	3	Ttt
-2	101	3	Ggg
-2	102	3	Ggg
-2	103	3	Ccc
-2	104	3	Ggg
-2	105	3	G$gg
-2	106	2	aa
-2	107	2	aa
-2	108	2	cc
-2	109	2	cc
-2	110	2	tt
-2	111	2	cc
-2	112	2	tt
-2	113	2	cc
-2	114	2	aa
-2	115	2	gg
-2	116	2	aa
-2	117	2	cc
-2	118	2	cc
-2	119	2	tt
-2	120	2	cc
-2	121	2	cc
-2	122	2	cc
-2	123	2	aa
-2	124	2	gg
-2	125	2	cc
-2	126	2	cc
-2	127	2	aa
-2	128	2	gg
-2	129	2	aa
-2	130	2	aa
-2	131	2	aa
-2	132	2	gg
-2	133	2	gg
-2	134	2	ag
-2	135	2	aa
-2	136	2	tt
-2	137	2	cc
-2	138	2	t$t$
-2	495	1	^Ft
-2	496	1	t
-2	497	1	t
-2	498	1	g
-2	499	1	g
-2	500	1	c
-2	501	1	a
-2	502	1	a
-2	503	1	t
-2	504	1	t
-2	505	1	t
-2	506	1	a
-2	507	1	c
-2	508	1	a
-2	509	1	c
-2	510	1	t
-2	511	1	g
-2	512	1	t
-2	513	1	g
-2	514	1	t
-2	515	1	t
-2	516	1	a
-2	517	1	t
-2	518	1	a
-2	519	1	g
-2	520	1	c
-2	521	1	a
-2	522	1	a
-2	523	1	t
-2	524	1	a
-2	525	1	t
-2	526	1	a
-2	527	1	g
-2	528	1	t
-2	529	1	g
-2	530	1	a
-2	531	1	a
-2	532	1	a
-2	533	1	a
-2	534	1	g
-2	535	1	g
-2	536	1	g
-2	537	1	t
-2	538	1	g
-2	539	1	a
-2	540	1	t
-2	541	1	c
-2	542	1	a
-2	543	1	t
-2	544	1	t
-2	545	1	a
-2	546	1	c
-2	547	1	c
-2	548	1	t
-2	549	1	c
-2	550	1	a
-2	551	1	a
-2	552	1	g
-2	553	1	a
-2	554	1	c
-2	555	1	t
-2	556	1	g
-2	557	1	t
-2	558	1	t
-2	559	1	c
-2	560	1	a
-2	561	1	c
-2	562	1	a
-2	563	1	a
-2	564	1	a
-2	565	1	c
-2	566	1	a
-2	567	1	c
-2	568	1	a
-2	569	1	t$
-2	648	1	^gA
-2	649	1	C
-2	650	1	G
-2	651	1	C
-2	652	1	A
-2	653	1	C
-2	654	1	C
-2	655	1	C
-2	656	1	T
-2	657	1	C
-2	658	1	T
-2	659	1	A
-2	660	1	T
-2	661	1	C
-2	662	1	C
-2	663	1	C
-2	664	1	C
-2	665	1	A
-2	666	1	C
-2	667	1	A
-2	668	1	T
-2	669	1	A
-2	670	1	A
-2	671	1	A
-2	672	1	T
-2	673	1	C
-2	674	1	T
-2	675	1	A
-2	676	1	T
-2	677	1	A
-2	678	1	C
-2	679	1	A
-2	680	1	A
-2	681	1	C
-2	682	2	A^>a
-2	683	2	Cc
-2	684	2	Tt
-2	685	2	Cc
-2	686	2	Aa
-2	687	2	Cc
-2	688	2	Cc
-2	689	2	Cc
-2	690	2	Tt
-2	691	2	Cc
-2	692	2	Tt
-2	693	2	Aa
-2	694	2	Cc
-2	695	2	Aa
-2	696	2	Cc
-2	697	2	Cc
-2	698	2	Cc
-2	699	2	Aa
-2	700	2	Cc
-2	701	2	Aa
-2	702	2	Tt
-2	703	2	Aa
-2	704	2	Cc
-2	705	2	Aa
-2	706	2	Tt
-2	707	2	Cc
-2	708	2	Tt
-2	709	2	Aa
-2	710	2	Tt
-2	711	2	Aa
-2	712	2	Cc
-2	713	2	Aa
-2	714	2	Aa
-2	715	2	Cc
-2	716	2	Aa
-2	717	2	Cc
-2	718	2	Gg
-2	719	2	C$c
-2	720	1	a
-2	721	1	c
-2	722	1	c
-2	723	1	c
-2	724	1	t
-2	725	1	c
-2	726	1	t
-2	727	1	a
-2	728	1	c
-2	729	1	c
-2	730	1	c
-2	731	1	c
-2	732	1	a
-2	733	1	c
-2	734	1	a
-2	735	1	t
-2	736	1	a
-2	737	1	c
-2	738	1	g
-2	739	1	t
-2	740	1	c
-2	741	1	t
-2	742	1	a
-2	743	1	c
-2	744	1	a
-2	745	1	c
-2	746	1	a
-2	747	1	a
-2	748	1	c
-2	749	1	a
-2	750	1	t
-2	751	1	g
-2	752	1	c
-2	753	1	a
-2	754	1	c
-2	755	1	g
-2	756	1	c$
+2	1	1	^]T	A
+2	2	1	G	E
+2	3	1	G	D
+2	4	1	A	@
+2	5	1	G	?
+2	6	1	A	?
+2	7	1	G	E
+2	8	1	C	C
+2	9	1	A	B
+2	10	1	C	J
+2	11	1	A	B
+2	12	1	T	C
+2	13	1	A	A
+2	14	1	A	D
+2	15	1	C	J
+2	16	1	T	A
+2	17	1	T	A
+2	18	1	G	J
+2	19	1	G	I
+2	20	1	G	D
+2	21	1	T	@
+2	22	1	G	I
+2	23	1	A	<
+2	24	1	G	J
+2	25	1	A	A
+2	26	1	T	<
+2	27	1	G	I
+2	28	1	A	?
+2	29	1	T	@
+2	30	1	G	K
+2	31	2	A^]A	BA
+2	32	2	AA	E<
+2	33	2	AA	E@
+2	34	2	TT	A@
+2	35	2	GG	KF
+2	36	2	AA	C;
+2	37	2	GG	K?
+2	38	2	CC	HF
+2	39	2	AA	DA
+2	40	2	CC	J=
+2	41	2	TT	FE
+2	42	2	GG	II
+2	43	2	GG	K;
+2	44	2	CC	I=
+2	45	2	TT	GE
+2	46	2	TT	CA
+2	47	2	TT	AB
+2	48	2	GG	1H
+2	49	2	GG	:9
+2	50	2	AA	=9
+2	51	2	GG	FJ
+2	52	2	TT	AA
+2	53	2	CC	HH
+2	54	2	AA	CC
+2	55	2	CC	HG
+2	56	2	AA	C@
+2	57	2	CC	HE
+2	58	2	AA	D>
+2	59	2	GG	AI
+2	60	2	AA	A@
+2	61	2	CC	IG
+2	62	2	CC	7(
+2	63	2	AA	<C
+2	64	3	GG^]g	!;a
+2	65	3	GGg	!&k
+2	66	3	GGg	!&^
+2	67	4	TTt^]t	!!`:
+2	68	4	CCcc	!!f[
+2	69	4	CCcc	!!mU
+2	70	4	AAaa	!!ba
+2	71	4	GGgg	!!UZ
+2	72	4	GGgg	!!RC
+2	73	4	CCcc	!!YF
+2	74	4	GGgg	!!`L
+2	75	4	C$Ccc	!!fZ
+2	76	3	Ccc	!IQ
+2	77	3	Ttt	!B^
+2	78	3	Agg	!D1
+2	79	3	Ttt	!;>
+2	80	3	Aat	!3)
+2	81	3	Ccc	!HU
+2	82	3	Ccc	!JP
+2	83	3	Aaa	!@Z
+2	84	3	Ttt	!3S
+2	85	3	Aaa	!@T
+2	86	3	Aaa	!=S
+2	87	3	Ccc	!HI
+2	88	3	Acc	!F9
+2	89	3	Ccc	!EY
+2	90	3	Ttt	!<]
+2	91	3	Ccc	!5N
+2	92	3	Tgt	!-N
+2	93	3	Aaa	!C_
+2	94	3	Ggg	!CT
+2	95	3	Ttt	!<R
+2	96	3	Ggg	!Ic
+2	97	3	Ggg	!G[
+2	98	3	Ttt	!<U
+2	99	3	Ggg	!GZ
+2	100	3	Ttt	!:_
+2	101	3	Ggg	!I\
+2	102	3	Ggg	!HJ
+2	103	3	Ccc	!DX
+2	104	3	Ggg	!IK
+2	105	3	G$gg	!KF
+2	106	2	aa	B3
+2	107	2	aa	A2
+2	108	2	cc	D&
+2	109	2	cc	D>
+2	110	2	tt	B>
+2	111	2	cc	JD
+2	112	2	tt	B@
+2	113	2	cc	K;
+2	114	2	aa	?C
+2	115	2	gg	B=
+2	116	2	aa	?8
+2	117	2	cc	J&
+2	118	2	cc	AF
+2	119	2	tt	B@
+2	120	2	cc	J:
+2	121	2	cc	J>
+2	122	2	cc	IF
+2	123	2	aa	DC
+2	124	2	gg	H@
+2	125	2	cc	I7
+2	126	2	cc	IG
+2	127	2	aa	C7
+2	128	2	gg	BB
+2	129	2	aa	>7
+2	130	2	aa	<>
+2	131	2	aa	C>
+2	132	2	gg	F6
+2	133	2	gg	F,
+2	134	2	ag	>0
+2	135	2	aa	??
+2	136	2	tt	??
+2	137	2	cc	EB
+2	138	2	t$t$	?>
+2	495	1	^Ft	E
+2	496	1	t	E
+2	497	1	t	D
+2	498	1	g	J
+2	499	1	g	L
+2	500	1	c	N
+2	501	1	a	D
+2	502	1	a	D
+2	503	1	t	F
+2	504	1	t	F
+2	505	1	t	C
+2	506	1	a	B
+2	507	1	c	N
+2	508	1	a	B
+2	509	1	c	N
+2	510	1	t	D
+2	511	1	g	L
+2	512	1	t	D
+2	513	1	g	L
+2	514	1	t	F
+2	515	1	t	B
+2	516	1	a	D
+2	517	1	t	C
+2	518	1	a	G
+2	519	1	g	J
+2	520	1	c	M
+2	521	1	a	C
+2	522	1	a	C
+2	523	1	t	B
+2	524	1	a	C
+2	525	1	t	B
+2	526	1	a	H
+2	527	1	g	L
+2	528	1	t	D
+2	529	1	g	M
+2	530	1	a	D
+2	531	1	a	D
+2	532	1	a	D
+2	533	1	a	H
+2	534	1	g	J
+2	535	1	g	J
+2	536	1	g	L
+2	537	1	t	D
+2	538	1	g	M
+2	539	1	a	C
+2	540	1	t	C
+2	541	1	c	M
+2	542	1	a	C
+2	543	1	t	D
+2	544	1	t	A
+2	545	1	a	@
+2	546	1	c	L
+2	547	1	c	M
+2	548	1	t	C
+2	549	1	c	M
+2	550	1	a	C
+2	551	1	a	G
+2	552	1	g	K
+2	553	1	a	@
+2	554	1	c	M
+2	555	1	t	C
+2	556	1	g	K
+2	557	1	t	E
+2	558	1	t	B
+2	559	1	c	K
+2	560	1	a	@
+2	561	1	c	K
+2	562	1	a	A
+2	563	1	a	A
+2	564	1	a	@
+2	565	1	c	I
+2	566	1	a	?
+2	567	1	c	I
+2	568	1	a	>
+2	569	1	t$	@
+2	648	1	^gA	?
+2	649	1	C	F
+2	650	1	G	0
+2	651	1	C	D
+2	652	1	A	<
+2	653	1	C	<
+2	654	1	C	G
+2	655	1	C	>
+2	656	1	T	D
+2	657	1	C	H
+2	658	1	T	@
+2	659	1	A	A
+2	660	1	T	>
+2	661	1	C	E
+2	662	1	C	:
+2	663	1	C	=
+2	664	1	C	H
+2	665	1	A	A
+2	666	1	C	F
+2	667	1	A	C
+2	668	1	T	<
+2	669	1	A	?
+2	670	1	A	@
+2	671	1	A	?
+2	672	1	T	/
+2	673	1	C	:
+2	674	1	T	@
+2	675	1	A	+
+2	676	1	T	>
+2	677	1	A	?
+2	678	1	C	D
+2	679	1	A	B
+2	680	1	A	@
+2	681	1	C	5
+2	682	2	A^>a	<?
+2	683	2	Cc	CE
+2	684	2	Tt	DA
+2	685	2	Cc	HF
+2	686	2	Aa	B>
+2	687	2	Cc	GH
+2	688	2	Cc	2H
+2	689	2	Cc	:F
+2	690	2	Tt	E?
+2	691	2	Cc	GG
+2	692	2	Tt	C@
+2	693	2	Aa	+?
+2	694	2	Cc	=G
+2	695	2	Aa	B@
+2	696	2	Cc	FH
+2	697	2	Cc	<K
+2	698	2	Cc	BJ
+2	699	2	Aa	?@
+2	700	2	Cc	>J
+2	701	2	Aa	AA
+2	702	2	Tt	??
+2	703	2	Aa	3A
+2	704	2	Cc	CM
+2	705	2	Aa	CC
+2	706	2	Tt	BB
+2	707	2	Cc	HL
+2	708	2	Tt	EB
+2	709	2	Aa	=C
+2	710	2	Tt	??
+2	711	2	Aa	@A
+2	712	2	Cc	CK
+2	713	2	Aa	=C
+2	714	2	Aa	;A
+2	715	2	Cc	>K
+2	716	2	Aa	@@
+2	717	2	Cc	5B
+2	718	2	Gg	8I
+2	719	2	C$c	(G
+2	720	1	a	=
+2	721	1	c	J
+2	722	1	c	K
+2	723	1	c	J
+2	724	1	t	A
+2	725	1	c	H
+2	726	1	t	>
+2	727	1	a	?
+2	728	1	c	J
+2	729	1	c	L
+2	730	1	c	J
+2	731	1	c	L
+2	732	1	a	@
+2	733	1	c	L
+2	734	1	a	B
+2	735	1	t	A
+2	736	1	a	?
+2	737	1	c	C
+2	738	1	g	J
+2	739	1	t	B
+2	740	1	c	I
+2	741	1	t	@
+2	742	1	a	@
+2	743	1	c	J
+2	744	1	a	@
+2	745	1	c	I
+2	746	1	a	B
+2	747	1	a	@
+2	748	1	c	K
+2	749	1	a	@
+2	750	1	t	B
+2	751	1	g	H
+2	752	1	c	H
+2	753	1	a	>
+2	754	1	c	B
+2	755	1	g	F
+2	756	1	c$	?
diff --git a/test/noroundtrip-out.vcf b/test/noroundtrip-out.vcf
index 582406459..21fa1602a 100644
--- a/test/noroundtrip-out.vcf
+++ b/test/noroundtrip-out.vcf
@@ -2,8 +2,10 @@
 ##FILTER=<ID=PASS,Description="All filters passed">
 ##contig=<ID=3>
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=S,Number=1,Type=String,Description="Non-GT string">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA1
-3	50	.	A	T	0	PASS	.	GT:GT	2,4:.
+3	50	.	A	T	0	PASS	.	GT	0/1
 3	60	.	T	C	0	PASS	.	GT	0/1
-3	70	.	G	A	0	PASS	.	GT:GT	2,4:.
-3	80	.	C	G	0	PASS	.	GT:GT	2,4:0/1
+3	70	.	G	A	0	PASS	.	GT	0/1
+3	80	.	C	G	0	PASS	.	GT	0/1
+3	90	.	A	G	0	PASS	.	GT:S	0/1:.
diff --git a/test/noroundtrip.vcf b/test/noroundtrip.vcf
index c4610415d..61206bf0c 100644
--- a/test/noroundtrip.vcf
+++ b/test/noroundtrip.vcf
@@ -1,8 +1,10 @@
 ##fileformat=VCFv4.3
 ##contig=<ID=3>
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=S,Number=1,Type=String,Description="Non-GT string">
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NA1
 3	50	.	A	T	0	PASS	.	GT:GT	0/1
 3	60	.	T	C	0	PASS	.	GT	0/1
 3	70	.	G	A	0	PASS	.	GT:GT	0/1:.
 3	80	.	C	G	0	PASS	.	GT:GT	0/1:0/1
+3	90	.	A	G	0	PASS	.	GT:S:S	0/1
diff --git a/test/pileup.c b/test/pileup.c
index e36f34d23..757b2aeb0 100644
--- a/test/pileup.c
+++ b/test/pileup.c
@@ -1,6 +1,6 @@
 /*  test/pileup.c -- simple pileup tester
 
-    Copyright (C) 2014,2018-2019 Genome Research Ltd.
+    Copyright (C) 2014,2018-2019, 2024 Genome Research Ltd.
 
     Author: James Bonfield <jkb@sanger.ac.uk>
 
@@ -119,6 +119,19 @@ static int print_pileup_seq(const bam_pileup1_t *p, int n) {
     return -1;
 }
 
+static void print_pileup_qual(const bam_pileup1_t *p, int n) {
+    int i;
+
+    for (i = 0; i < n; i++, p++) {
+        uint8_t *qual = bam_get_qual(p->b);
+        uint8_t q = '~';
+        if (p->qpos < p->b->core.l_qseq &&
+            qual[p->qpos]+33 < '~')
+            q = qual[p->qpos]+33;
+        putchar(q);
+    }
+}
+
 static int test_pileup(ptest_t *input) {
     bam_plp_t plp = NULL;
     const bam_pileup1_t *p;
@@ -143,6 +156,9 @@ static int test_pileup(ptest_t *input) {
         if (print_pileup_seq(p, n) < 0)
             goto fail;
 
+        putchar('\t');
+        print_pileup_qual(p, n);
+
         putchar('\n');
     }
     if (n < 0) {
@@ -188,6 +204,9 @@ static int test_mpileup(ptest_t *input) {
         if (print_pileup_seq(pileups[0], n_plp[0]) < 0)
             goto fail;
 
+        putchar('\t');
+        print_pileup_qual(pileups[0], n_plp[0]);
+
         putchar('\n');
     }
     if (n < 0) {
diff --git a/test/pileup_mod.c b/test/pileup_mod.c
index 323c0c6c2..d725a09a4 100644
--- a/test/pileup_mod.c
+++ b/test/pileup_mod.c
@@ -73,8 +73,10 @@ void process_pileup(sam_hdr_t *h, const bam_pileup1_t *p,
 // as each new read is added or removed from the pileups.
 int pileup_cd_create(void *data, const bam1_t *b, bam_pileup_cd *cd) {
     hts_base_mod_state *m = hts_base_mod_state_alloc();
-    if (bam_parse_basemod(b, m) < 0)
+    if (bam_parse_basemod(b, m) < 0) {
+        hts_base_mod_state_free(m);
         return -1;
+    }
     cd->p = m;
     return 0;
 }
diff --git a/test/sam.c b/test/sam.c
index 00edaff6f..f0eadbefe 100644
--- a/test/sam.c
+++ b/test/sam.c
@@ -1,6 +1,6 @@
 /*  test/sam.c -- SAM/BAM/CRAM API test cases.
 
-    Copyright (C) 2014-2020, 2022 Genome Research Ltd.
+    Copyright (C) 2014-2020, 2022-2023 Genome Research Ltd.
 
     Author: John Marshall <jm18@sanger.ac.uk>
 
diff --git a/test/tabix/tabix.tst b/test/tabix/tabix.tst
index f5fd71a08..316c26fdb 100644
--- a/test/tabix/tabix.tst
+++ b/test/tabix/tabix.tst
@@ -1,4 +1,4 @@
-#    Copyright (C) 2017 Genome Research Ltd.
+#    Copyright (C) 2017, 2024 Genome Research Ltd.
 #
 #    Author: Robert Davies <rmd@sanger.ac.uk>
 #
@@ -66,3 +66,7 @@ P gff_file.X.2934832.2935190.out $tabix gff_file.tbi.tmp.gff.gz X:2934832-293519
 
 # tabix with --separate-regions
 P bed_file.separate.out $tabix --separate-regions bed_file.tbi.tmp.bed.gz X:1100-1400 Y:100000-100550 Z:100000-100005
+
+# Using threads with tabix
+P . $tabix -f -p bed bed_file.tbi.tmp.bed.gz -@ 2
+P vcf_file.1.3000151.out $tabix vcf_file.tbi.tmp.vcf.gz 1:3000151-3000151 --threads 2
diff --git a/test/test.pl b/test/test.pl
index 4934c15d0..03eca1129 100755
--- a/test/test.pl
+++ b/test/test.pl
@@ -1,6 +1,6 @@
 #!/usr/bin/env perl
 #
-#    Copyright (C) 2012-2023 Genome Research Ltd.
+#    Copyright (C) 2012-2024 Genome Research Ltd.
 #
 #    Author: Petr Danecek <pd3@sanger.ac.uk>
 #
@@ -589,6 +589,36 @@ sub test_bgzip {
         return;
     }
     passed($opts,$test);
+
+    # try writing to an explicit file name, round trip test
+    $test = sprintf('%s %2s threads', 'bgzip --output',
+                    $threads ? $threads : 'no');
+    print "$test: ";
+
+    my $compressed_op = "$$opts{tmp}/arbitrary.$threads.gz";
+    my $uncompressed_op = "$$opts{tmp}/arbitrary.$threads.txt";
+
+    $c = "$$opts{bin}/bgzip $at '$data' -o '$compressed_op'";
+
+    ($ret, $out) = _cmd($c);
+    if ($ret) {
+        failed($opts, $test, "non-zero exit from $c");
+        return;
+    }
+    $c = "$$opts{bin}/bgzip $at -d $compressed_op --output '$uncompressed_op'";
+
+    ($ret, $out) = _cmd($c);
+    if ($ret) {
+        failed($opts, $test, "non-zero exit from $c");
+        return;
+    }
+    $c = "cmp '$data' '$uncompressed_op'";
+    ($ret, $out) = _cmd($c);
+    if ($ret) {
+        failed($opts, $test, $out ? $out : "'$data' '$uncompressed_op' differ");
+        return;
+    }
+    passed($opts,$test);
 }
 
 my $test_view_failures;
diff --git a/test/test_mod.c b/test/test_mod.c
index d8a53f3de..ebe9b2aaf 100644
--- a/test/test_mod.c
+++ b/test/test_mod.c
@@ -213,17 +213,18 @@ int main(int argc, char **argv) {
         puts("\n===\n");
     }
     fflush(stdout);
+    int ret = 0;
     if (sam_close(in) != 0 || r < -1)
-        goto err;
+        ret = 1;
 
     bam_destroy1(b);
     sam_hdr_destroy(h);
     hts_base_mod_state_free(m);
-    return 0;
+    return ret;
 
  err:
     bam_destroy1(b);
     sam_hdr_destroy(h);
     hts_base_mod_state_free(m);
-    return 1;
+    return sam_close(in) != 0 ? 1 : 2;
 }
diff --git a/textutils_internal.h b/textutils_internal.h
index 1ad096494..faa1d4d11 100644
--- a/textutils_internal.h
+++ b/textutils_internal.h
@@ -1,6 +1,6 @@
 /* textutils_internal.h -- non-bioinformatics utility routines for text etc.
 
-   Copyright (C) 2016,2018-2020 Genome Research Ltd.
+   Copyright (C) 2016,2018-2020, 2024 Genome Research Ltd.
 
    Author: John Marshall <jm18@sanger.ac.uk>
 
@@ -221,27 +221,43 @@ static inline int64_t hts_str2int(const char *in, char **end, int bits,
     uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
     const unsigned char *v = (const unsigned char *) in;
     const unsigned int ascii_zero = '0'; // Prevents conversion to signed
-    unsigned char d;
-    int neg = 1;
+    unsigned int d;
 
+    int neg;
     switch(*v) {
     case '-':
-        neg=-1;
-        limit++; /* fall through */
-    case '+':
+        limit++;
+        neg=1;
         v++;
+        // See "dup" comment below
+        while (--fast && *v>='0' && *v<='9')
+            n = n*10 + *v++ - ascii_zero;
         break;
+
+    case '+':
+        v++;
+        // fall through
+
     default:
+        neg = 0;
+        // dup of above.  This is somewhat unstable and mainly for code
+        // size cheats to prevent instruction cache lines spanning 32-byte
+        // blocks in the sam_parse_B_vals calling code.  It's been tested
+        // on gcc7, gcc13, clang10 and clang16 with -O2 and -O3.  While
+        // not exhaustive, this code duplication gives stable fast results
+        // while a single copy does not.
+        // (NB: system was "seq4d", so quite old)
+        while (--fast && *v>='0' && *v<='9')
+            n = n*10 + *v++ - ascii_zero;
         break;
     }
 
-    while (--fast && *v>='0' && *v<='9')
-        n = n*10 + *v++ - ascii_zero;
-
-    if (!fast) {
+    // NB gcc7 is slow with (unsigned)(*v - ascii_zero) < 10,
+    // while gcc13 prefers it.
+    if (*v>='0' && !fast) { // rejects ',' and tab
         uint64_t limit_d_10 = limit / 10;
         uint64_t limit_m_10 = limit - 10 * limit_d_10;
-         while ((d = *v - ascii_zero) < 10) {
+        while ((d = *v - ascii_zero) < 10) {
             if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
                 n = n*10 + d;
                 v++;
@@ -256,7 +272,7 @@ static inline int64_t hts_str2int(const char *in, char **end, int bits,
 
     *end = (char *)v;
 
-    return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n;
+    return neg ? (int64_t)-n : (int64_t)n;
 }
 
 /// Convert a string to an unsigned integer, with overflow detection
@@ -279,12 +295,12 @@ Both end and failed must be non-NULL.
  */
 
 static inline uint64_t hts_str2uint(const char *in, char **end, int bits,
-                                      int *failed) {
+                                    int *failed) {
     uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
     const unsigned char *v = (const unsigned char *) in;
     const unsigned int ascii_zero = '0'; // Prevents conversion to signed
     uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
-    unsigned char d;
+    unsigned int d;
 
     if (*v == '+')
         v++;
@@ -292,7 +308,7 @@ static inline uint64_t hts_str2uint(const char *in, char **end, int bits,
     while (--fast && *v>='0' && *v<='9')
         n = n*10 + *v++ - ascii_zero;
 
-    if (!fast) {
+    if ((unsigned)(*v - ascii_zero) < 10 && !fast) {
         uint64_t limit_d_10 = limit / 10;
         uint64_t limit_m_10 = limit - 10 * limit_d_10;
         while ((d = *v - ascii_zero) < 10) {
diff --git a/vcf.c b/vcf.c
index abf90f9cc..9dec8481b 100644
--- a/vcf.c
+++ b/vcf.c
@@ -1,7 +1,7 @@
 /*  vcf.c -- VCF/BCF API functions.
 
     Copyright (C) 2012, 2013 Broad Institute.
-    Copyright (C) 2012-2023 Genome Research Ltd.
+    Copyright (C) 2012-2024 Genome Research Ltd.
     Portions copyright (C) 2014 Intel Corporation.
 
     Author: Heng Li <lh3@sanger.ac.uk>
@@ -1557,6 +1557,7 @@ int bcf_hdr_write(htsFile *hfp, bcf_hdr_t *h)
     u32_to_le(htxt.l, hlen);
     if ( bgzf_write(fp, hlen, 4) !=4 ) return -1;
     if ( bgzf_write(fp, htxt.s, htxt.l) != htxt.l ) return -1;
+    if ( bgzf_flush(fp) < 0) return -1;
 
     free(htxt.s);
     return 0;
@@ -3028,6 +3029,26 @@ static int vcf_parse_format_alloc4(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
             fmt[j].buf = (uint8_t*)mem->s + fmt[j].offset;
     }
 
+    // check for duplicate tags
+    int i;
+    for (i=1; i<v->n_fmt; i++)
+    {
+        fmt_aux_t *ifmt = &fmt[i];
+        if ( ifmt->size==-1 ) continue; // already marked for removal
+        for (j=0; j<i; j++)
+        {
+            fmt_aux_t *jfmt = &fmt[j];
+            if ( jfmt->size==-1 ) continue; // already marked for removal
+            if ( ifmt->key!=jfmt->key ) continue;
+            static int warned = 0;
+            if ( !warned ) hts_log_warning("Duplicate FORMAT tag %s at %s:%"PRIhts_pos, bcf_hdr_int2id(h,BCF_DT_ID,ifmt->key), bcf_seqname_safe(h,v), v->pos+1);
+            warned = 1;
+            v->errcode |= BCF_ERR_TAG_INVALID;
+            ifmt->size = -1;
+            ifmt->offset = 0;
+            break;
+        }
+    }
     return 0;
 }
 
@@ -3071,7 +3092,7 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
 
             if ( z->size==-1 )
             {
-                // this field is to be ignored, it's too big
+                // this field is to be ignored, it's either too big or a duplicate
                 while ( *t != ':' && *t ) t++;
             }
             else if (htype == BCF_HT_STR) {
@@ -3213,6 +3234,10 @@ static int vcf_parse_format_fill5(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
             fmt_aux_t *z = &fmt[j];
             const int htype = z->y>>4&0xf;
             int l;
+
+            if (z->size == -1) // this field is to be ignored
+                continue;
+
             if (htype == BCF_HT_STR) {
                 if (z->is_gt) {
                     int32_t *x = (int32_t*)(z->buf + z->size * (size_t)m);
@@ -3273,18 +3298,17 @@ static int vcf_parse_format_gt6(kstring_t *s, const bcf_hdr_t *h, bcf1_t *v,
 
     }
     if ( need_downsize ) {
-        i = 1;
+        i = 0;
         while ( i < v->n_fmt ) {
             if ( fmt[i].size==-1 )
             {
-                memmove(&fmt[i-1],&fmt[i],sizeof(*fmt));
                 v->n_fmt--;
+                if ( i < v->n_fmt ) memmove(&fmt[i],&fmt[i+1],sizeof(*fmt)*(v->n_fmt-i));
             }
             else
                 i++;
         }
     }
-
     return 0;
 }
 
diff --git a/version.sh b/version.sh
index 39fb49e45..98ae48ec0 100755
--- a/version.sh
+++ b/version.sh
@@ -24,7 +24,7 @@
 # DEALINGS IN THE SOFTWARE.
 
 # Master version, for use in tarballs or non-git source copies
-VERSION=1.19.1
+VERSION=1.20
 
 # If we have a git clone, then check against the current tag
 srcdir=${0%/version.sh}