Skip to content

Commit

Permalink
Fix #912
Browse files Browse the repository at this point in the history
  • Loading branch information
martin-steinegger committed Jan 6, 2025
1 parent 17cd5c0 commit b0e91c1
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 10 deletions.
36 changes: 33 additions & 3 deletions src/commons/DBReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,9 @@ template <typename T> bool DBReader<T>::open(int accessType){
}

compression = isCompressed(dbtype);
if(compression == COMPRESSED){
padded = (getExtendedDbtype(dbtype) & Parameters::DBTYPE_EXTENDED_GPU);

if(compression == COMPRESSED || padded){
compressedBufferSizes = new size_t[threads];
compressedBuffers = new char*[threads];
dstream = new ZSTD_DStream*[threads];
Expand Down Expand Up @@ -530,6 +532,29 @@ template <typename T> size_t DBReader<T>::bsearch(const Index * index, size_t N,
return std::upper_bound(index, index + N, val, Index::compareByIdOnly) - index;
}


template <typename T> char* DBReader<T>::getUnpadded(size_t id, int thrIdx) {
char *data = getDataUncompressed(id);
size_t seqLen = getSeqLen(id);

static const char CODE_TO_CHAR[21] = {
'A', /* 0 */ 'C', /* 1 */ 'D', /* 2 */
'E', /* 3 */ 'F', /* 4 */ 'G', /* 5 */
'H', /* 6 */ 'I', /* 7 */ 'K', /* 8 */
'L', /* 9 */ 'M', /* 10 */ 'N', /* 11 */
'P', /* 12 */ 'Q', /* 13 */ 'R', /* 14 */
'S', /* 15 */ 'T', /* 16 */ 'V', /* 17 */
'W', /* 18 */ 'Y', /* 19 */ 'X' /* 20 */
};

for(size_t i = 0; i < seqLen; i++){
unsigned char code = static_cast<unsigned char>(data[i]);
unsigned char baseCode = (code >= 32) ? code - 32 : code;
compressedBuffers[thrIdx][i] = CODE_TO_CHAR[baseCode];
}
return compressedBuffers[thrIdx];
}

template <typename T> char* DBReader<T>::getDataCompressed(size_t id, int thrIdx) {
char *data = getDataUncompressed(id);

Expand Down Expand Up @@ -573,7 +598,9 @@ template <typename T> size_t DBReader<T>::getAminoAcidDBSize() {
template <typename T> char* DBReader<T>::getData(size_t id, int thrIdx){
if(compression == COMPRESSED){
return getDataCompressed(id, thrIdx);
}else{
}else if (padded) {
return getUnpadded(id, thrIdx);
} else {
return getDataUncompressed(id);
}
}
Expand Down Expand Up @@ -628,7 +655,9 @@ template <typename T> char* DBReader<T>::getDataByDBKey(T dbKey, int thrIdx) {
size_t id = getId(dbKey);
if(compression == COMPRESSED ){
return (id != UINT_MAX) ? getDataCompressed(id, thrIdx) : NULL;
}else{
} if(padded) {
return (id != UINT_MAX) ? getUnpadded(id, thrIdx) : NULL;
} else{
return (id != UINT_MAX) ? getDataByOffset(index[id].offset) : NULL;
}
}
Expand Down Expand Up @@ -1016,6 +1045,7 @@ int DBReader<T>::isCompressed(int dbtype) {
return (dbtype & (1 << 31)) ? COMPRESSED : UNCOMPRESSED;
}


template<typename T>
void DBReader<T>::setSequentialAdvice() {
#ifdef HAVE_POSIX_MADVISE
Expand Down
3 changes: 3 additions & 0 deletions src/commons/DBReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ class DBReader : public MemoryTracker {

char* getDataCompressed(size_t id, int thrIdx);

char* getUnpadded(size_t id, int thrIdx);

char* getDataUncompressed(size_t id);

void touchData(size_t id);
Expand Down Expand Up @@ -479,6 +481,7 @@ class DBReader : public MemoryTracker {
// stores the dbtype (if dbtype file exists)
int dbtype;
int compression;
int padded;
char ** compressedBuffers;
size_t * compressedBufferSizes;
ZSTD_DStream ** dstream;
Expand Down
7 changes: 0 additions & 7 deletions src/util/convertalignments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,6 @@ int convertalignments(int argc, const char **argv, const Command &command) {
std::string queryProfData;
queryProfData.reserve(1024);

std::string queryBuffer;
queryBuffer.reserve(1024);

std::string queryHeaderBuffer;
queryHeaderBuffer.reserve(1024);

Expand All @@ -366,10 +363,6 @@ int convertalignments(int argc, const char **argv, const Command &command) {
size_t qId = qDbr.sequenceReader->getId(queryKey);
querySeqData = qDbr.sequenceReader->getData(qId, thread_idx);
querySeqLen = qDbr.sequenceReader->getSeqLen(qId);
if(sameDB && qDbr.sequenceReader->isCompressed()){
queryBuffer.assign(querySeqData, querySeqLen);
querySeqData = (char*) queryBuffer.c_str();
}
if (queryProfile) {
size_t queryEntryLen = qDbr.sequenceReader->getEntryLen(qId);
Sequence::extractProfileConsensus(querySeqData, queryEntryLen, *subMat, queryProfData);
Expand Down

0 comments on commit b0e91c1

Please sign in to comment.