@@ -62,6 +62,41 @@ char byte2base(uint8_t byte, int offset) {
62
62
return bases [foo ];
63
63
}
64
64
65
+ void bytes2bases (char * seq , uint8_t * byte , uint32_t sz , int offset ) {
66
+ uint32_t pos = 0 , remainder = 0 , i = 0 ;
67
+ char bases [4 ] = "TCAG" ;
68
+ uint8_t foo = byte [0 ];
69
+
70
+ // Deal with the first partial byte
71
+ if (offset != 0 ) {
72
+ while (offset < 4 ) {
73
+ seq [pos ++ ] = byte2base (foo , offset ++ );
74
+ }
75
+ foo = byte [++ i ];
76
+ }
77
+
78
+ // Deal with everything else, with the possible exception of the last fractional byte
79
+ remainder = (sz - pos ) % 4 ;
80
+ while (pos < sz - remainder ) {
81
+ foo = byte [i ++ ];
82
+ seq [pos + 3 ] = bases [foo & 3 ];
83
+ foo >>= 2 ;
84
+ seq [pos + 2 ] = bases [foo & 3 ];
85
+ foo >>= 2 ;
86
+ seq [pos + 1 ] = bases [foo & 3 ];
87
+ foo >>= 2 ;
88
+ seq [pos ] = bases [foo & 3 ];
89
+ foo >>= 2 ;
90
+ pos += 4 ;
91
+ }
92
+
93
+ // Deal with the last partial byte
94
+ if (remainder > 0 ) foo = byte [i ];
95
+ for (offset = 0 ; offset < remainder ; offset ++ ) {
96
+ seq [pos ++ ] = byte2base (foo , offset );
97
+ }
98
+ }
99
+
65
100
/*
66
101
Replace Ts (or whatever else is being used) with N as appropriate
67
102
*/
@@ -122,25 +157,23 @@ void softMask(char *seq, TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end)
122
157
This is the worker function for twobitSequence, which mostly does error checking
123
158
*/
124
159
char * constructSequence (TwoBit * tb , uint32_t tid , uint32_t start , uint32_t end ) {
125
- uint32_t sz = end - start + 1 , pos = 0 ;
126
- uint32_t blockStart , offset ;
127
- char * seq = malloc (sz * sizeof (char )), byte ;
160
+ uint32_t sz = end - start + 1 ;
161
+ uint32_t blockStart , blockEnd , offset ;
162
+ char * seq = malloc (sz * sizeof (char ));
163
+ uint8_t * bytes = NULL ;
128
164
if (!seq ) return NULL ;
129
165
130
166
//There are 4 bases/byte
131
167
blockStart = start /4 ;
132
168
offset = start % 4 ;
169
+ blockEnd = end /4 + ((end % 4 ) ? 1 : 0 );
170
+ bytes = malloc (blockEnd - blockStart );
171
+ if (!bytes ) goto error ;
133
172
134
173
if (twobitSeek (tb , tb -> idx -> offset [tid ] + blockStart ) != 0 ) goto error ;
135
- while (pos < sz - 1 ) {
136
- if (twobitRead (& byte , 1 , 1 , tb ) != 1 ) goto error ;
137
-
138
- for (; offset < 4 ; offset ++ ) {
139
- seq [pos ++ ] = byte2base (byte , offset );
140
- if (pos >= sz - 1 ) break ;
141
- }
142
- offset = 0 ;
143
- }
174
+ if (twobitRead (bytes , blockEnd - blockStart , 1 , tb ) != 1 ) goto error ;
175
+ bytes2bases (seq , bytes , sz - 1 , offset );
176
+ free (bytes );
144
177
145
178
//Null terminate the output
146
179
seq [sz - 1 ] = '\0' ;
@@ -155,6 +188,7 @@ char *constructSequence(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end)
155
188
156
189
error :
157
190
if (seq ) free (seq );
191
+ if (bytes ) free (bytes );
158
192
return NULL ;
159
193
}
160
194
0 commit comments