@@ -52,8 +52,7 @@ class DbInfo {
52
52
};
53
53
54
54
55
- void IndexBuilder::fillDatabase (IndexTable *indexTable, SequenceLookup **maskedLookup,
56
- SequenceLookup **unmaskedLookup,BaseMatrix &subMat,
55
+ void IndexBuilder::fillDatabase (IndexTable *indexTable, SequenceLookup ** externalLookup, BaseMatrix &subMat,
57
56
ScoreMatrix & three, ScoreMatrix & two, Sequence *seq,
58
57
DBReader<unsigned int > *dbr, size_t dbFrom, size_t dbTo, int kmerThr,
59
58
bool mask, bool maskLowerCaseMode, float maskProb, int maskNrepeats, int targetSearchMode) {
@@ -65,27 +64,14 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
65
64
size_t dbSize = dbTo - dbFrom;
66
65
DbInfo* info = new DbInfo (dbFrom, dbTo, seq->getEffectiveKmerSize (), *dbr);
67
66
68
- SequenceLookup *sequenceLookup;
69
- if (unmaskedLookup != NULL && maskedLookup == NULL ) {
70
- *unmaskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
71
- sequenceLookup = *unmaskedLookup;
72
- } else if (unmaskedLookup == NULL && maskedLookup != NULL ) {
73
- *maskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
74
- sequenceLookup = *maskedLookup;
75
- } else if (unmaskedLookup != NULL && maskedLookup != NULL ) {
76
- *unmaskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
77
- *maskedLookup = new SequenceLookup (dbSize, info->aaDbSize );
78
- sequenceLookup = *maskedLookup;
79
- } else {
80
- Debug (Debug::ERROR) << " This should not happen\n " ;
81
- EXIT (EXIT_FAILURE);
82
- }
67
+ *externalLookup = new SequenceLookup (dbSize, info->aaDbSize );
68
+ SequenceLookup *sequenceLookup = *externalLookup;
83
69
84
70
85
71
// identical scores for memory reduction code
86
72
char *idScoreLookup = getScoreLookup (subMat);
87
73
Debug::Progress progress (dbTo-dbFrom);
88
-
74
+ bool needMasking = (mask == 1 || maskNrepeats > 0 || maskLowerCaseMode == 1 );
89
75
size_t maskedResidues = 0 ;
90
76
size_t totalKmerCount = 0 ;
91
77
#pragma omp parallel
@@ -96,16 +82,17 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
96
82
#endif
97
83
// need to prune low scoring k-mers through masking
98
84
Masker *masker = NULL ;
99
- if (maskedLookup != NULL ) {
85
+ if (needMasking ) {
100
86
masker = new Masker (subMat);
101
87
}
102
88
103
-
104
- Indexer idxer (static_cast <unsigned int >(indexTable->getAlphabetSize ()), seq->getKmerSize ());
89
+ unsigned int alphabetSize = (indexTable != NULL ) ? static_cast <unsigned int >(indexTable->getAlphabetSize ())
90
+ : static_cast <unsigned int >(subMat.alphabetSize );
91
+ Indexer idxer (alphabetSize, seq->getKmerSize ());
105
92
Sequence s (seq->getMaxLen (), seq->getSeqType (), &subMat, seq->getKmerSize (), seq->isSpaced (), false , true , seq->getUserSpacedKmerPattern ());
106
93
107
94
KmerGenerator *generator = NULL ;
108
- if (isTargetSimiliarKmerSearch) {
95
+ if (isTargetSimiliarKmerSearch && indexTable != NULL ) {
109
96
generator = new KmerGenerator (seq->getKmerSize (), indexTable->getAlphabetSize (), kmerThr);
110
97
if (isProfile){
111
98
generator->setDivideStrategy (s.profile_matrix );
@@ -132,26 +119,21 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
132
119
// count similar or exact k-mers based on sequence type
133
120
if (isTargetSimiliarKmerSearch) {
134
121
// Find out if we should also mask profiles
135
- totalKmerCount += indexTable->addSimilarKmerCount (&s, generator);
136
- unsigned char * seq = (isProfile) ? s.numConsensusSequence : s.numSequence ;
137
- if (unmaskedLookup != NULL ) {
138
- (*unmaskedLookup)->addSequence (seq, s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
139
- } else if (maskedLookup != NULL ) {
140
- (*maskedLookup)->addSequence (seq, s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
122
+ if (indexTable != NULL ){
123
+ totalKmerCount += indexTable->addSimilarKmerCount (&s, generator);
141
124
}
125
+ unsigned char * seq = (isProfile) ? s.numConsensusSequence : s.numSequence ;
126
+
127
+ sequenceLookup->addSequence (seq, s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
128
+
142
129
} else {
143
130
// Do not mask if column state sequences are used
144
- if (unmaskedLookup != NULL ) {
145
- (*unmaskedLookup)->addSequence (s.numSequence , s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
146
- }
147
-
148
131
maskedResidues += masker->maskSequence (s, mask, maskProb, maskLowerCaseMode, maskNrepeats);
132
+ sequenceLookup->addSequence (s.numSequence , s.L , id - dbFrom, info->sequenceOffsets [id - dbFrom]);
149
133
150
- if (maskedLookup != NULL ){
151
- (*maskedLookup)-> addSequence (s. numSequence , s. L , id - dbFrom, info-> sequenceOffsets [id - dbFrom] );
134
+ if (indexTable != NULL ){
135
+ totalKmerCount += indexTable-> addKmerCount (&s, &idxer, buffer, kmerThr, idScoreLookup );
152
136
}
153
-
154
- totalKmerCount += indexTable->addKmerCount (&s, &idxer, buffer, kmerThr, idScoreLookup);
155
137
}
156
138
}
157
139
@@ -168,14 +150,13 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
168
150
169
151
170
152
Debug (Debug::INFO) << " Index table: Masked residues: " << maskedResidues << " \n " ;
171
- if (totalKmerCount == 0 ) {
172
- Debug (Debug::ERROR ) << " No k-mer could be extracted for the database " << dbr->getDataFileName () << " .\n "
153
+ if (indexTable != NULL && totalKmerCount == 0 ) {
154
+ Debug (Debug::WARNING ) << " No k-mer could be extracted for the database " << dbr->getDataFileName () << " .\n "
173
155
<< " Maybe the sequences length is less than 14 residues.\n " ;
174
156
if (maskedResidues == true ){
175
- Debug (Debug::ERROR ) << " or contains only low complexity regions." ;
176
- Debug (Debug::ERROR ) << " Use --mask 0 to deactivate the low complexity filter.\n " ;
157
+ Debug (Debug::WARNING ) << " or contains only low complexity regions." ;
158
+ Debug (Debug::WARNING ) << " Use --mask 0 to deactivate the low complexity filter.\n " ;
177
159
}
178
- EXIT (EXIT_FAILURE);
179
160
}
180
161
181
162
dbr->remapData ();
@@ -193,9 +174,10 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
193
174
// }
194
175
// Debug(Debug::INFO) << "Index table: Remove "<< lowSelectiveResidues <<" none selective residues\n";
195
176
// Debug(Debug::INFO) << "Index table: init... from "<< dbFrom << " to "<< dbTo << "\n";
196
-
197
- indexTable->initMemory (info->tableSize );
198
- indexTable->init ();
177
+ if (indexTable != NULL ){
178
+ indexTable->initMemory (info->tableSize );
179
+ indexTable->init ();
180
+ }
199
181
200
182
delete info;
201
183
Debug::Progress progress2 (dbTo-dbFrom);
@@ -208,7 +190,9 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
208
190
thread_idx = static_cast <unsigned int >(omp_get_thread_num ());
209
191
#endif
210
192
Sequence s (seq->getMaxLen (), seq->getSeqType (), &subMat, seq->getKmerSize (), seq->isSpaced (), false , true , seq->getUserSpacedKmerPattern ());
211
- Indexer idxer (static_cast <unsigned int >(indexTable->getAlphabetSize ()), seq->getKmerSize ());
193
+ unsigned int alphabetSize = (indexTable != NULL ) ? static_cast <unsigned int >(indexTable->getAlphabetSize ())
194
+ : static_cast <unsigned int >(subMat.alphabetSize );
195
+ Indexer idxer (alphabetSize, seq->getKmerSize ());
212
196
IndexEntryLocalTmp *buffer = static_cast <IndexEntryLocalTmp *>(malloc ( seq->getMaxLen () * sizeof (IndexEntryLocalTmp)));
213
197
size_t bufferSize = seq->getMaxLen ();
214
198
KmerGenerator *generator = NULL ;
@@ -229,10 +213,14 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
229
213
unsigned int qKey = dbr->getDbKey (id);
230
214
if (isTargetSimiliarKmerSearch) {
231
215
s.mapSequence (id - dbFrom, qKey, dbr->getData (id, thread_idx), dbr->getSeqLen (id));
232
- indexTable->addSimilarSequence (&s, generator, &buffer, bufferSize, &idxer);
216
+ if (indexTable != NULL ) {
217
+ indexTable->addSimilarSequence (&s, generator, &buffer, bufferSize, &idxer);
218
+ }
233
219
} else {
234
220
s.mapSequence (id - dbFrom, qKey, sequenceLookup->getSequence (id - dbFrom));
235
- indexTable->addSequence (&s, &idxer, &buffer, bufferSize, kmerThr, idScoreLookup);
221
+ if (indexTable != NULL ) {
222
+ indexTable->addSequence (&s, &idxer, &buffer, bufferSize, kmerThr, idScoreLookup);
223
+ }
236
224
}
237
225
}
238
226
@@ -245,6 +233,8 @@ void IndexBuilder::fillDatabase(IndexTable *indexTable, SequenceLookup **maskedL
245
233
if (idScoreLookup!=NULL ){
246
234
delete[] idScoreLookup;
247
235
}
248
- indexTable->revertPointer ();
249
- indexTable->sortDBSeqLists ();
236
+ if (indexTable != NULL ){
237
+ indexTable->revertPointer ();
238
+ indexTable->sortDBSeqLists ();
239
+ }
250
240
}
0 commit comments