Skip to content

Commit b0e91c1

Browse files
Fix #912
1 parent 17cd5c0 commit b0e91c1

File tree

3 files changed

+36
-10
lines changed

3 files changed

+36
-10
lines changed

src/commons/DBReader.cpp

+33-3
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,9 @@ template <typename T> bool DBReader<T>::open(int accessType){
185185
}
186186

187187
compression = isCompressed(dbtype);
188-
if(compression == COMPRESSED){
188+
padded = (getExtendedDbtype(dbtype) & Parameters::DBTYPE_EXTENDED_GPU);
189+
190+
if(compression == COMPRESSED || padded){
189191
compressedBufferSizes = new size_t[threads];
190192
compressedBuffers = new char*[threads];
191193
dstream = new ZSTD_DStream*[threads];
@@ -530,6 +532,29 @@ template <typename T> size_t DBReader<T>::bsearch(const Index * index, size_t N,
530532
return std::upper_bound(index, index + N, val, Index::compareByIdOnly) - index;
531533
}
532534

535+
536+
template <typename T> char* DBReader<T>::getUnpadded(size_t id, int thrIdx) {
537+
char *data = getDataUncompressed(id);
538+
size_t seqLen = getSeqLen(id);
539+
540+
static const char CODE_TO_CHAR[21] = {
541+
'A', /* 0 */ 'C', /* 1 */ 'D', /* 2 */
542+
'E', /* 3 */ 'F', /* 4 */ 'G', /* 5 */
543+
'H', /* 6 */ 'I', /* 7 */ 'K', /* 8 */
544+
'L', /* 9 */ 'M', /* 10 */ 'N', /* 11 */
545+
'P', /* 12 */ 'Q', /* 13 */ 'R', /* 14 */
546+
'S', /* 15 */ 'T', /* 16 */ 'V', /* 17 */
547+
'W', /* 18 */ 'Y', /* 19 */ 'X' /* 20 */
548+
};
549+
550+
for(size_t i = 0; i < seqLen; i++){
551+
unsigned char code = static_cast<unsigned char>(data[i]);
552+
unsigned char baseCode = (code >= 32) ? code - 32 : code;
553+
compressedBuffers[thrIdx][i] = CODE_TO_CHAR[baseCode];
554+
}
555+
return compressedBuffers[thrIdx];
556+
}
557+
533558
template <typename T> char* DBReader<T>::getDataCompressed(size_t id, int thrIdx) {
534559
char *data = getDataUncompressed(id);
535560

@@ -573,7 +598,9 @@ template <typename T> size_t DBReader<T>::getAminoAcidDBSize() {
573598
template <typename T> char* DBReader<T>::getData(size_t id, int thrIdx){
574599
if(compression == COMPRESSED){
575600
return getDataCompressed(id, thrIdx);
576-
}else{
601+
}else if (padded) {
602+
return getUnpadded(id, thrIdx);
603+
} else {
577604
return getDataUncompressed(id);
578605
}
579606
}
@@ -628,7 +655,9 @@ template <typename T> char* DBReader<T>::getDataByDBKey(T dbKey, int thrIdx) {
628655
size_t id = getId(dbKey);
629656
if(compression == COMPRESSED ){
630657
return (id != UINT_MAX) ? getDataCompressed(id, thrIdx) : NULL;
631-
}else{
658+
} if(padded) {
659+
return (id != UINT_MAX) ? getUnpadded(id, thrIdx) : NULL;
660+
} else{
632661
return (id != UINT_MAX) ? getDataByOffset(index[id].offset) : NULL;
633662
}
634663
}
@@ -1016,6 +1045,7 @@ int DBReader<T>::isCompressed(int dbtype) {
10161045
return (dbtype & (1 << 31)) ? COMPRESSED : UNCOMPRESSED;
10171046
}
10181047

1048+
10191049
template<typename T>
10201050
void DBReader<T>::setSequentialAdvice() {
10211051
#ifdef HAVE_POSIX_MADVISE

src/commons/DBReader.h

+3
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ class DBReader : public MemoryTracker {
174174

175175
char* getDataCompressed(size_t id, int thrIdx);
176176

177+
char* getUnpadded(size_t id, int thrIdx);
178+
177179
char* getDataUncompressed(size_t id);
178180

179181
void touchData(size_t id);
@@ -479,6 +481,7 @@ class DBReader : public MemoryTracker {
479481
// stores the dbtype (if dbtype file exists)
480482
int dbtype;
481483
int compression;
484+
int padded;
482485
char ** compressedBuffers;
483486
size_t * compressedBufferSizes;
484487
ZSTD_DStream ** dstream;

src/util/convertalignments.cpp

-7
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,6 @@ int convertalignments(int argc, const char **argv, const Command &command) {
337337
std::string queryProfData;
338338
queryProfData.reserve(1024);
339339

340-
std::string queryBuffer;
341-
queryBuffer.reserve(1024);
342-
343340
std::string queryHeaderBuffer;
344341
queryHeaderBuffer.reserve(1024);
345342

@@ -366,10 +363,6 @@ int convertalignments(int argc, const char **argv, const Command &command) {
366363
size_t qId = qDbr.sequenceReader->getId(queryKey);
367364
querySeqData = qDbr.sequenceReader->getData(qId, thread_idx);
368365
querySeqLen = qDbr.sequenceReader->getSeqLen(qId);
369-
if(sameDB && qDbr.sequenceReader->isCompressed()){
370-
queryBuffer.assign(querySeqData, querySeqLen);
371-
querySeqData = (char*) queryBuffer.c_str();
372-
}
373366
if (queryProfile) {
374367
size_t queryEntryLen = qDbr.sequenceReader->getEntryLen(qId);
375368
Sequence::extractProfileConsensus(querySeqData, queryEntryLen, *subMat, queryProfData);

0 commit comments

Comments
 (0)