5
5
#include " Matcher.h"
6
6
#include " Util.h"
7
7
#include " TranslateNucl.h"
8
- #include " itoa.h"
9
-
10
8
#include " Orf.h"
11
9
12
10
#include < unistd.h>
17
15
#include < omp.h>
18
16
#endif
19
17
18
+ void handleSingleFrame (TranslateNucl& translateNucl, DBWriter& sequenceWriter, DBWriter& headerWriter, unsigned int key, char * headerBuffer, const char * data, size_t seqLen, int frame, bool reverse, bool translate, char *& aaBuffer, size_t & aaBufferSize, int thread_idx) {
19
+ data = data + frame;
20
+ seqLen = seqLen - frame;
21
+ if (translate == true ) {
22
+ if (seqLen < 3 ) {
23
+ return ;
24
+ }
25
+ size_t codonLength = (seqLen / 3 ) * 3 ;
26
+ if ((codonLength + 1 ) > aaBufferSize) {
27
+ aaBufferSize = codonLength * 1.5 + 1 ;
28
+ aaBuffer = (char *)realloc (aaBuffer, aaBufferSize * sizeof (char ));
29
+ }
30
+ translateNucl.translate (aaBuffer, data, codonLength);
31
+ aaBuffer[codonLength / 3 ] = ' \n ' ;
32
+ sequenceWriter.writeData (aaBuffer, (codonLength / 3 ) + 1 , key, thread_idx);
33
+ size_t bufferLen;
34
+ if (reverse) {
35
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, frame + codonLength, static_cast <size_t >(frame), 0 , 0 );
36
+ } else {
37
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, static_cast <size_t >(frame), frame + codonLength, 0 , 0 );
38
+ }
39
+ headerWriter.writeData (headerBuffer, bufferLen, key, thread_idx);
40
+ } else {
41
+ // +1: add newline, but remove it from the end pos
42
+ sequenceWriter.writeData (data, seqLen + 1 , key, thread_idx);
43
+ size_t bufferLen;
44
+ if (reverse) {
45
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, seqLen - 1 , static_cast <size_t >(frame), 0 , 0 );
46
+ } else {
47
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, static_cast <size_t >(frame), seqLen - 1 , 0 , 0 );
48
+ }
49
+ headerWriter.writeData (headerBuffer, bufferLen, key, thread_idx);
50
+ }
51
+ }
52
+
20
53
int extractframes (int argc, const char **argv, const Command& command) {
21
54
Parameters& par = Parameters::getInstance ();
22
55
par.parseParameters (argc, argv, command, true , 0 , 0 );
@@ -25,7 +58,7 @@ int extractframes(int argc, const char **argv, const Command& command) {
25
58
reader.open (DBReader<unsigned int >::NOSORT);
26
59
27
60
int outputDbtype = reader.getDbtype ();
28
- if (par.translate ) {
61
+ if (par.translate ) {
29
62
outputDbtype = Parameters::DBTYPE_AMINO_ACIDS;
30
63
}
31
64
DBWriter sequenceWriter (par.db2 .c_str (), par.db2Index .c_str (), par.threads , par.compressed , outputDbtype);
@@ -52,157 +85,66 @@ int extractframes(int argc, const char **argv, const Command& command) {
52
85
queryFrom = 0 ;
53
86
}
54
87
55
- char * aa = new char [par.maxSeqLen + 3 + 1 ];
88
+ size_t aaBufferSize = par.maxSeqLen + 3 + 1 ;
89
+ char * aa = NULL ;
90
+ if (par.translate == true ) {
91
+ aa = (char *)malloc (aaBufferSize * sizeof (char ));
92
+ }
93
+
56
94
char buffer[1024 ];
95
+
57
96
std::string reverseComplementStr;
58
97
reverseComplementStr.reserve (32000 );
98
+
59
99
for (unsigned int i = queryFrom; i < (queryFrom + querySize); ++i){
60
100
progress.updateProgress ();
61
101
62
102
unsigned int key = reader.getDbKey (i);
63
103
const char * data = reader.getData (i, thread_idx);
64
104
size_t seqLen = reader.getSeqLen (i);
65
105
66
- size_t bufferLen;
67
106
if (forwardFrames & Orf::FRAME_1) {
68
- if (par.translate ) {
69
- size_t currSeqLen = seqLen + 1 ;
70
- if (currSeqLen >= 3 ) {
71
- if (currSeqLen > (3 * par.maxSeqLen )) {
72
- currSeqLen = (3 * par.maxSeqLen );
73
- }
74
- size_t condonLength = currSeqLen / 3 * 3 ;
75
- translateNucl.translate (aa, data, condonLength);
76
- sequenceWriter.writeData (aa, (condonLength / 3 ), key, thread_idx);
77
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(0 ), seqLen - 1 , 0 , 0 );
78
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
79
- }
80
- } else {
81
- sequenceWriter.writeData (data, seqLen + 1 , key, thread_idx);
82
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(0 ), seqLen - 1 , 0 , 0 );
83
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
84
- }
107
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 0 , false , par.translate , aa, aaBufferSize, thread_idx);
85
108
}
86
109
if (forwardFrames & Orf::FRAME_2) {
87
- if (par.translate ) {
88
- size_t currSeqLen = seqLen;
89
- if (currSeqLen >= 3 ) {
90
- if (currSeqLen > (3 * par.maxSeqLen )) {
91
- currSeqLen = (3 * par.maxSeqLen );
92
- }
93
- size_t condonLength = currSeqLen / 3 * 3 ;
94
- translateNucl.translate (aa, data + 1 , condonLength);
95
- sequenceWriter.writeData (aa, (condonLength / 3 ), key, thread_idx);
96
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(1 ), seqLen - 2 , 0 , 0 );
97
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
98
- }
99
- } else {
100
- sequenceWriter.writeData (data + 1 , seqLen, key, thread_idx);
101
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(1 ), seqLen - 2 , 0 , 0 );
102
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
103
- }
110
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 1 , false , par.translate , aa, aaBufferSize, thread_idx);
104
111
}
105
112
if (forwardFrames & Orf::FRAME_3) {
106
- if (par.translate ) {
107
- size_t currSeqLen = seqLen - 1 ;
108
- if (currSeqLen >= 3 ) {
109
- if (currSeqLen > (3 * par.maxSeqLen )) {
110
- currSeqLen = (3 * par.maxSeqLen );
111
- }
112
- size_t condonLength = currSeqLen / 3 * 3 ;
113
- translateNucl.translate (aa, data + 2 , condonLength);
114
- sequenceWriter.writeData (aa, (condonLength / 3 ), key, thread_idx);
115
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(2 ), seqLen - 3 , 0 , 0 );
116
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
117
- }
118
- } else {
119
- sequenceWriter.writeData (data + 2 , seqLen - 1 , key, thread_idx);
120
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(2 ), seqLen - 3 , 0 , 0 );
121
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
122
- }
113
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 2 , false , par.translate , aa, aaBufferSize, thread_idx);
123
114
}
124
115
125
-
126
- if (reverseFrames != 0 ){
127
- size_t sequenceLength = seqLen;
116
+ if (reverseFrames != 0 ) {
128
117
// bool hasWrongChar = false;
129
- for (size_t pos = 0 ; pos < sequenceLength ; ++pos) {
130
- char reverseComplement = Orf::complement (data[sequenceLength - pos - 1 ]);
118
+ for (size_t pos = 0 ; pos < seqLen ; ++pos) {
119
+ char reverseComplement = Orf::complement (data[seqLen - pos - 1 ]);
131
120
reverseComplement = (reverseComplement == ' .' ) ? ' N' : reverseComplement;
132
121
reverseComplementStr.push_back (reverseComplement);
133
122
// hasWrongChar |= (reverseComplement == '.');
134
123
}
135
- // if (hasWrongChar == true){
136
- // continue;
137
- // }
124
+ // if (hasWrongChar == true) {
125
+ // continue;
126
+ // }
138
127
reverseComplementStr.push_back (' \n ' );
139
-
140
- seqLen = reverseComplementStr.size ();
128
+ seqLen = reverseComplementStr.size () - 1 ;
141
129
data = reverseComplementStr.c_str ();
142
130
}
143
131
144
132
if (reverseFrames & Orf::FRAME_1) {
145
- if (par.translate ) {
146
- size_t currSeqLen = seqLen;
147
- if (currSeqLen >= 3 ) {
148
- if (currSeqLen > (3 * par.maxSeqLen )) {
149
- currSeqLen = (3 * par.maxSeqLen );
150
- }
151
- size_t condonLength = currSeqLen / 3 * 3 ;
152
- translateNucl.translate (aa, data, condonLength);
153
- sequenceWriter.writeData (aa, (condonLength / 3 ), key, thread_idx);
154
- bufferLen = Orf::writeOrfHeader (buffer, key, seqLen - 2 , static_cast <size_t >(0 ), 0 , 0 );
155
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
156
- }
157
- } else {
158
- sequenceWriter.writeData (data, seqLen, key, thread_idx);
159
- bufferLen = Orf::writeOrfHeader (buffer, key, seqLen - 2 , static_cast <size_t >(0 ), 0 , 0 );
160
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
161
- }
133
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 0 , true , par.translate , aa, aaBufferSize, thread_idx);
162
134
}
163
135
164
136
if (reverseFrames & Orf::FRAME_2) {
165
- if (par.translate ) {
166
- size_t currSeqLen = seqLen - 1 ;
167
- if (currSeqLen >= 3 ) {
168
- if (currSeqLen > (3 * par.maxSeqLen )) {
169
- currSeqLen = (3 * par.maxSeqLen );
170
- }
171
- size_t condonLength = currSeqLen / 3 * 3 ;
172
- translateNucl.translate (aa, data + 1 , condonLength);
173
- sequenceWriter.writeData (aa, (condonLength / 3 ), key, thread_idx);
174
- bufferLen = Orf::writeOrfHeader (buffer, key, seqLen - 3 , static_cast <size_t >(1 ), 0 , 0 );
175
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
176
- }
177
- } else {
178
- sequenceWriter.writeData (data + 1 , seqLen - 1 , key, thread_idx);
179
- bufferLen = Orf::writeOrfHeader (buffer, key, seqLen - 3 , static_cast <size_t >(1 ), 0 , 0 );
180
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
181
- }
137
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 1 , true , par.translate , aa, aaBufferSize, thread_idx);
182
138
}
183
139
184
140
if (reverseFrames & Orf::FRAME_3) {
185
- if (par.translate ) {
186
- size_t currSeqLen = seqLen - 2 ;
187
- if (currSeqLen >= 3 ) {
188
- if (currSeqLen > (3 * par.maxSeqLen )) {
189
- currSeqLen = (3 * par.maxSeqLen );
190
- }
191
- size_t condonLength = currSeqLen / 3 * 3 ;
192
- translateNucl.translate (aa, data + 2 , condonLength);
193
- sequenceWriter.writeData (aa, (condonLength / 3 ), key, thread_idx);
194
- bufferLen = Orf::writeOrfHeader (buffer, key, seqLen - 4 , static_cast <size_t >(2 ), 0 , 0 );
195
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
196
- }
197
- } else {
198
- sequenceWriter.writeData (data + 2 , seqLen - 2 , key, thread_idx);
199
- bufferLen = Orf::writeOrfHeader (buffer, key, seqLen - 4 , static_cast <size_t >(2 ), 0 , 0 );
200
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
201
- }
141
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 2 , true , par.translate , aa, aaBufferSize, thread_idx);
202
142
}
203
143
reverseComplementStr.clear ();
204
144
}
205
- delete[] aa;
145
+ if (aa != NULL ) {
146
+ free (aa);
147
+ }
206
148
}
207
149
headerWriter.close (true );
208
150
sequenceWriter.close (true );
0 commit comments