Skip to content

Commit a0b96d2

Browse files
committed
Speed things up under normal usage
1 parent 448a439 commit a0b96d2

File tree

2 files changed

+47
-13
lines changed

2 files changed

+47
-13
lines changed

2bit.c

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,41 @@ char byte2base(uint8_t byte, int offset) {
6262
return bases[foo];
6363
}
6464

65+
void bytes2bases(char *seq, uint8_t *byte, uint32_t sz, int offset) {
66+
uint32_t pos = 0, remainder = 0, i = 0;
67+
char bases[4] = "TCAG";
68+
uint8_t foo = byte[0];
69+
70+
// Deal with the first partial byte
71+
if(offset != 0) {
72+
while(offset < 4) {
73+
seq[pos++] = byte2base(foo, offset++);
74+
}
75+
foo = byte[++i];
76+
}
77+
78+
// Deal with everything else, with the possible exception of the last fractional byte
79+
remainder = (sz - pos) % 4;
80+
while(pos < sz - remainder) {
81+
foo = byte[i++];
82+
seq[pos + 3] = bases[foo & 3];
83+
foo >>= 2;
84+
seq[pos + 2] = bases[foo & 3];
85+
foo >>= 2;
86+
seq[pos + 1] = bases[foo & 3];
87+
foo >>= 2;
88+
seq[pos] = bases[foo & 3];
89+
foo >>= 2;
90+
pos += 4;
91+
}
92+
93+
// Deal with the last partial byte
94+
if(remainder > 0) foo = byte[i];
95+
for(offset=0; offset<remainder; offset++) {
96+
seq[pos++] = byte2base(foo, offset);
97+
}
98+
}
99+
65100
/*
66101
Replace Ts (or whatever else is being used) with N as appropriate
67102
*/
@@ -122,25 +157,23 @@ void softMask(char *seq, TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end)
122157
This is the worker function for twobitSequence, which mostly does error checking
123158
*/
124159
char *constructSequence(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end) {
125-
uint32_t sz = end - start + 1, pos = 0;
126-
uint32_t blockStart, offset;
127-
char *seq = malloc(sz * sizeof(char)), byte;
160+
uint32_t sz = end - start + 1;
161+
uint32_t blockStart, blockEnd, offset;
162+
char *seq = malloc(sz * sizeof(char));
163+
uint8_t *bytes = NULL;
128164
if(!seq) return NULL;
129165

130166
//There are 4 bases/byte
131167
blockStart = start/4;
132168
offset = start % 4;
169+
blockEnd = end/4 + ((end % 4) ? 1 : 0);
170+
bytes = malloc(blockEnd - blockStart);
171+
if(!bytes) goto error;
133172

134173
if(twobitSeek(tb, tb->idx->offset[tid] + blockStart) != 0) goto error;
135-
while(pos < sz - 1) {
136-
if(twobitRead(&byte, 1, 1, tb) != 1) goto error;
137-
138-
for(; offset<4; offset++) {
139-
seq[pos++] = byte2base(byte, offset);
140-
if(pos >= sz - 1) break;
141-
}
142-
offset = 0;
143-
}
174+
if(twobitRead(bytes, blockEnd - blockStart, 1, tb) != 1) goto error;
175+
bytes2bases(seq, bytes, sz - 1, offset);
176+
free(bytes);
144177

145178
//Null terminate the output
146179
seq[sz - 1] = '\0';
@@ -155,6 +188,7 @@ char *constructSequence(TwoBit *tb, uint32_t tid, uint32_t start, uint32_t end)
155188

156189
error:
157190
if(seq) free(seq);
191+
if(bytes) free(bytes);
158192
return NULL;
159193
}
160194

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
CC ?= gcc
22
AR ?= ar
33
RANLIB ?= ranlib
4-
CFLAGS ?= -g -Wall -O0
4+
CFLAGS ?= -g -Wall -O3
55
LIBS =
66
EXTRA_CFLAGS_PIC = -fpic
77
LDFLAGS =

0 commit comments

Comments
 (0)