4
4
#include " DBWriter.h"
5
5
#include " Matcher.h"
6
6
#include " Util.h"
7
- #include " itoa.h"
8
-
7
+ #include " TranslateNucl.h"
9
8
#include " Orf.h"
10
9
11
10
#include < unistd.h>
16
15
#include < omp.h>
17
16
#endif
18
17
18
+ void handleSingleFrame (TranslateNucl& translateNucl, DBWriter& sequenceWriter, DBWriter& headerWriter, unsigned int key, char * headerBuffer, const char * data, size_t seqLen, int frame, bool reverse, bool translate, char *& aaBuffer, size_t & aaBufferSize, int thread_idx) {
19
+ data = data + frame;
20
+ seqLen = seqLen - frame;
21
+ if (translate == true ) {
22
+ if (seqLen < 3 ) {
23
+ return ;
24
+ }
25
+ size_t codonLength = (seqLen / 3 ) * 3 ;
26
+ if ((codonLength + 1 ) > aaBufferSize) {
27
+ aaBufferSize = codonLength * 1.5 + 1 ;
28
+ aaBuffer = (char *)realloc (aaBuffer, aaBufferSize * sizeof (char ));
29
+ }
30
+ translateNucl.translate (aaBuffer, data, codonLength);
31
+ aaBuffer[codonLength / 3 ] = ' \n ' ;
32
+ sequenceWriter.writeData (aaBuffer, (codonLength / 3 ) + 1 , key, thread_idx);
33
+ size_t bufferLen;
34
+ if (reverse) {
35
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, frame + codonLength, static_cast <size_t >(frame), 0 , 0 );
36
+ } else {
37
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, static_cast <size_t >(frame), frame + codonLength, 0 , 0 );
38
+ }
39
+ headerWriter.writeData (headerBuffer, bufferLen, key, thread_idx);
40
+ } else {
41
+ // +1: add newline, but remove it from the end pos
42
+ sequenceWriter.writeData (data, seqLen + 1 , key, thread_idx);
43
+ size_t bufferLen;
44
+ if (reverse) {
45
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, seqLen - 1 , static_cast <size_t >(frame), 0 , 0 );
46
+ } else {
47
+ bufferLen = Orf::writeOrfHeader (headerBuffer, key, static_cast <size_t >(frame), seqLen - 1 , 0 , 0 );
48
+ }
49
+ headerWriter.writeData (headerBuffer, bufferLen, key, thread_idx);
50
+ }
51
+ }
52
+
19
53
int extractframes (int argc, const char **argv, const Command& command) {
20
54
Parameters& par = Parameters::getInstance ();
21
55
par.parseParameters (argc, argv, command, true , 0 , 0 );
22
56
23
57
DBReader<unsigned int > reader (par.db1 .c_str (), par.db1Index .c_str (), par.threads , DBReader<unsigned int >::USE_INDEX|DBReader<unsigned int >::USE_DATA);
24
58
reader.open (DBReader<unsigned int >::NOSORT);
25
59
26
- DBWriter sequenceWriter (par.db2 .c_str (), par.db2Index .c_str (), par.threads , par.compressed , reader.getDbtype ());
60
+ int outputDbtype = reader.getDbtype ();
61
+ if (par.translate ) {
62
+ outputDbtype = Parameters::DBTYPE_AMINO_ACIDS;
63
+ }
64
+ DBWriter sequenceWriter (par.db2 .c_str (), par.db2Index .c_str (), par.threads , par.compressed , outputDbtype);
27
65
sequenceWriter.open ();
28
66
29
67
DBWriter headerWriter (par.hdr2 .c_str (), par.hdr2Index .c_str (), par.threads , false , Parameters::DBTYPE_GENERIC_DB);
30
68
headerWriter.open ();
31
69
32
70
unsigned int forwardFrames = Orf::getFrames (par.forwardFrames );
33
71
unsigned int reverseFrames = Orf::getFrames (par.reverseFrames );
34
- Debug::Progress progress (reader.getSize ());
35
72
73
+ Debug::Progress progress (reader.getSize ());
74
+ TranslateNucl translateNucl (static_cast <TranslateNucl::GenCode>(par.translationTable ));
36
75
#pragma omp parallel
37
76
{
38
77
int thread_idx = 0 ;
@@ -46,70 +85,66 @@ int extractframes(int argc, const char **argv, const Command& command) {
46
85
queryFrom = 0 ;
47
86
}
48
87
88
+ size_t aaBufferSize = par.maxSeqLen + 3 + 1 ;
89
+ char * aa = NULL ;
90
+ if (par.translate == true ) {
91
+ aa = (char *)malloc (aaBufferSize * sizeof (char ));
92
+ }
93
+
49
94
char buffer[1024 ];
95
+
50
96
std::string reverseComplementStr;
51
97
reverseComplementStr.reserve (32000 );
98
+
52
99
for (unsigned int i = queryFrom; i < (queryFrom + querySize); ++i){
53
100
progress.updateProgress ();
54
101
55
102
unsigned int key = reader.getDbKey (i);
56
103
const char * data = reader.getData (i, thread_idx);
57
- size_t dataLength = reader.getEntryLen (i);
58
-
59
- size_t bufferLen;
60
- switch (forwardFrames){
61
- case Orf::FRAME_1:
62
- // -1 to ignore the null byte copy the new line
63
- sequenceWriter.writeData (data, dataLength - 1 , key, thread_idx);
64
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(0 ), dataLength - 3 , 0 , 0 );
65
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
66
- break ;
67
- case Orf::FRAME_2:
68
- sequenceWriter.writeData (data + 1 , dataLength - 2 , key, thread_idx);
69
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(1 ), dataLength - 4 , 0 , 0 );
70
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
71
- break ;
72
- case Orf::FRAME_3:
73
- sequenceWriter.writeData (data + 2 , dataLength - 3 , key, thread_idx);
74
- bufferLen = Orf::writeOrfHeader (buffer, key, static_cast <size_t >(2 ), dataLength - 5 , 0 , 0 );
75
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
76
- break ;
104
+ size_t seqLen = reader.getSeqLen (i);
105
+
106
+ if (forwardFrames & Orf::FRAME_1) {
107
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 0 , false , par.translate , aa, aaBufferSize, thread_idx);
108
+ }
109
+ if (forwardFrames & Orf::FRAME_2) {
110
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 1 , false , par.translate , aa, aaBufferSize, thread_idx);
111
+ }
112
+ if (forwardFrames & Orf::FRAME_3) {
113
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 2 , false , par.translate , aa, aaBufferSize, thread_idx);
77
114
}
78
115
79
- if (reverseFrames != 0 ){
80
- size_t sequenceLength = dataLength -2 ;
116
+ if (reverseFrames != 0 ) {
81
117
// bool hasWrongChar = false;
82
- for (size_t pos = 0 ; pos < sequenceLength ; ++pos) {
83
- char reverseComplement = Orf::complement (data[sequenceLength - pos - 1 ]);
118
+ for (size_t pos = 0 ; pos < seqLen ; ++pos) {
119
+ char reverseComplement = Orf::complement (data[seqLen - pos - 1 ]);
84
120
reverseComplement = (reverseComplement == ' .' ) ? ' N' : reverseComplement;
85
121
reverseComplementStr.push_back (reverseComplement);
86
122
// hasWrongChar |= (reverseComplement == '.');
87
123
}
88
- // if (hasWrongChar == true){
89
- // continue;
90
- // }
124
+ // if (hasWrongChar == true) {
125
+ // continue;
126
+ // }
91
127
reverseComplementStr.push_back (' \n ' );
128
+ seqLen = reverseComplementStr.size () - 1 ;
129
+ data = reverseComplementStr.c_str ();
92
130
}
93
131
94
- switch (reverseFrames){
95
- case Orf::FRAME_1:
96
- sequenceWriter.writeData (reverseComplementStr.c_str (), reverseComplementStr.size (), key, thread_idx);
97
- bufferLen = Orf::writeOrfHeader (buffer, key, reverseComplementStr.size () - 2 , static_cast <size_t >(0 ), 0 , 0 );
98
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
99
- break ;
100
- case Orf::FRAME_2:
101
- sequenceWriter.writeData (reverseComplementStr.c_str ()+1 , reverseComplementStr.size ()-1 , key, thread_idx);
102
- bufferLen = Orf::writeOrfHeader (buffer, key, reverseComplementStr.size () - 3 , static_cast <size_t >(1 ), 0 , 0 );
103
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
104
- break ;
105
- case Orf::FRAME_3:
106
- sequenceWriter.writeData (reverseComplementStr.c_str ()+2 , reverseComplementStr.size ()-2 , key, thread_idx);
107
- bufferLen = Orf::writeOrfHeader (buffer, key, reverseComplementStr.size () - 4 , static_cast <size_t >(2 ), 0 , 0 );
108
- headerWriter.writeData (buffer, bufferLen, key, thread_idx);
109
- break ;
132
+ if (reverseFrames & Orf::FRAME_1) {
133
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 0 , true , par.translate , aa, aaBufferSize, thread_idx);
134
+ }
135
+
136
+ if (reverseFrames & Orf::FRAME_2) {
137
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 1 , true , par.translate , aa, aaBufferSize, thread_idx);
138
+ }
139
+
140
+ if (reverseFrames & Orf::FRAME_3) {
141
+ handleSingleFrame (translateNucl, sequenceWriter, headerWriter, key, buffer, data, seqLen, 2 , true , par.translate , aa, aaBufferSize, thread_idx);
110
142
}
111
143
reverseComplementStr.clear ();
112
144
}
145
+ if (aa != NULL ) {
146
+ free (aa);
147
+ }
113
148
}
114
149
headerWriter.close (true );
115
150
sequenceWriter.close (true );
0 commit comments