Skip to content

Commit 2233b02

Browse files
author
yash-puligundla
committed
Addressing format related feedback from RANS PR that applies to Range Codec as well
1 parent 71b41f1 commit 2233b02

File tree

4 files changed

+158
-149
lines changed

4 files changed

+158
-149
lines changed

src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public class RangeCoder {
1111
private boolean carry;
1212
private int cache;
1313

14-
public RangeCoder() {
14+
protected RangeCoder() {
1515
// Spec: RangeEncodeStart
1616
this.low = 0;
1717
this.range = 0xFFFFFFFFL; // 4 bytes of all 1's
@@ -21,15 +21,15 @@ public RangeCoder() {
2121
this.cache = 0;
2222
}
2323

24-
public void rangeDecodeStart(ByteBuffer inBuffer){
24+
protected void rangeDecodeStart(ByteBuffer inBuffer){
2525
for (int i = 0; i < 5; i++){
2626

2727
// Get next 5 bytes. Ensure it is +ve
2828
code = (code << 8) + (inBuffer.get() & 0xFF);
2929
}
3030
}
3131

32-
public void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){
32+
protected void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){
3333
code -= sym_low * range;
3434
range *= sym_freq;
3535

@@ -39,12 +39,38 @@ public void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_
3939
}
4040
}
4141

42-
public int rangeGetFrequency(final int tot_freq){
42+
protected int rangeGetFrequency(final int tot_freq){
4343
range = (long) Math.floor(range / tot_freq);
4444
return (int) Math.floor(code / range);
4545
}
4646

47-
public void rangeShiftLow(ByteBuffer outBuffer) {
47+
protected void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){
48+
long old_low = low;
49+
range = (long) Math.floor(range/tot_freq);
50+
low += sym_low * range;
51+
low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low
52+
range *= sym_freq;
53+
54+
if (low < old_low) {
55+
carry = true;
56+
}
57+
58+
// Renormalise if range gets too small
59+
while (range < (1<<24)) {
60+
range <<= 8;
61+
rangeShiftLow(outBuffer);
62+
}
63+
64+
}
65+
66+
protected void rangeEncodeEnd(final ByteBuffer outBuffer){
67+
//TODO: Where is the magic number 5 coming from?
68+
for(int i = 0; i < 5; i++){
69+
rangeShiftLow(outBuffer);
70+
}
71+
}
72+
73+
private void rangeShiftLow(ByteBuffer outBuffer) {
4874
// rangeShiftLow tracks the total number of extra bytes to emit and
4975
// carry indicates whether they are a string of 0xFF or 0x00 values
5076

@@ -74,30 +100,4 @@ public void rangeShiftLow(ByteBuffer outBuffer) {
74100
low = low<<8 & (0xFFFFFFFFL); // force low to be +ve
75101
}
76102

77-
public void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){
78-
long old_low = low;
79-
range = (long) Math.floor(range/tot_freq);
80-
low += sym_low * range;
81-
low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low
82-
range *= sym_freq;
83-
84-
if (low < old_low) {
85-
carry = true;
86-
}
87-
88-
// Renormalise if range gets too small
89-
while (range < (1<<24)) {
90-
range <<= 8;
91-
rangeShiftLow(outBuffer);
92-
}
93-
94-
}
95-
96-
public void rangeEncodeEnd(final ByteBuffer outBuffer){
97-
//TODO: Where is the magic number 5 coming from?
98-
for(int i = 0; i < 5; i++){
99-
rangeShiftLow(outBuffer);
100-
}
101-
}
102-
103103
}

src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import htsjdk.samtools.cram.compression.rans.Utils;
66

77
import java.nio.ByteBuffer;
8+
import java.nio.ByteOrder;
89
import java.util.ArrayList;
910
import java.util.List;
1011

@@ -13,17 +14,15 @@ public class RangeDecode {
1314
private static final ByteBuffer EMPTY_BUFFER = ByteBuffer.allocate(0);
1415

1516
public ByteBuffer uncompress(final ByteBuffer inBuffer) {
17+
inBuffer.order(ByteOrder.LITTLE_ENDIAN);
1618
return uncompress(inBuffer, 0);
1719
}
1820

19-
public ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
21+
private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
2022
if (inBuffer.remaining() == 0) {
2123
return EMPTY_BUFFER;
2224
}
2325

24-
// TODO: little endian?
25-
// inBuffer.order(ByteOrder.LITTLE_ENDIAN);
26-
2726
// the first byte of compressed stream gives the formatFlags
2827
final int formatFlags = inBuffer.get() & 0xFF;
2928
final RangeParams rangeParams = new RangeParams(formatFlags);

src/main/java/htsjdk/samtools/cram/compression/range/RangeEncode.java

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,14 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar
1616
return EMPTY_BUFFER;
1717
}
1818

19-
ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining());
19+
final ByteBuffer outBuffer = allocateOutputBuffer(inBuffer.remaining());
2020
outBuffer.order(ByteOrder.BIG_ENDIAN);
2121
final int formatFlags = rangeParams.getFormatFlags();
2222
outBuffer.put((byte) (formatFlags));
2323

2424
if (!rangeParams.isNosz()) {
2525
// original size is not recorded
26-
int insize = inBuffer.remaining();
27-
Utils.writeUint7(insize,outBuffer);
26+
Utils.writeUint7(inBuffer.remaining(),outBuffer);
2827
}
2928

3029
ByteBuffer inputBuffer = inBuffer;
@@ -68,7 +67,7 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar
6867
outBuffer.rewind(); // set position to 0
6968
return outBuffer;
7069
} else if (rangeParams.isExternalCompression()){
71-
byte[] rawBytes = new byte[inputBuffer.remaining()];
70+
final byte[] rawBytes = new byte[inputBuffer.remaining()];
7271
inputBuffer.get( rawBytes,inBuffer.position(), inputBuffer.remaining());
7372
final BZIP2ExternalCompressor compressor = new BZIP2ExternalCompressor();
7473
final byte [] extCompressedBytes = compressor.compress(rawBytes);
@@ -79,16 +78,16 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar
7978
} else if (rangeParams.isRLE()){
8079
switch (rangeParams.getOrder()) {
8180
case ZERO:
82-
return compressRLEOrder0(inputBuffer, rangeParams, outBuffer);
81+
return compressRLEOrder0(inputBuffer, outBuffer);
8382
case ONE:
84-
return compressRLEOrder1(inputBuffer, rangeParams, outBuffer);
83+
return compressRLEOrder1(inputBuffer, outBuffer);
8584
}
8685
} else {
8786
switch (rangeParams.getOrder()) {
8887
case ZERO:
89-
return compressOrder0(inputBuffer, rangeParams, outBuffer);
88+
return compressOrder0(inputBuffer, outBuffer);
9089
case ONE:
91-
return compressOrder1(inputBuffer, rangeParams, outBuffer);
90+
return compressOrder1(inputBuffer, outBuffer);
9291
}
9392

9493
}
@@ -97,7 +96,6 @@ public ByteBuffer compress(final ByteBuffer inBuffer, final RangeParams rangePar
9796

9897
private ByteBuffer compressOrder0 (
9998
final ByteBuffer inBuffer,
100-
final RangeParams rangeParams,
10199
final ByteBuffer outBuffer) {
102100

103101
int maxSymbol = 0;
@@ -110,9 +108,9 @@ private ByteBuffer compressOrder0 (
110108
maxSymbol++; // TODO: Is this correct? Not what spec states!!
111109

112110
// TODO: initialize byteModel -> set and reset symbols?
113-
ByteModel byteModel = new ByteModel(maxSymbol);
111+
final ByteModel byteModel = new ByteModel(maxSymbol);
114112
outBuffer.put((byte) maxSymbol);
115-
RangeCoder rangeCoder = new RangeCoder();
113+
final RangeCoder rangeCoder = new RangeCoder();
116114
for (int i = 0; i < inSize; i++){
117115
byteModel.modelEncode(outBuffer,rangeCoder,inBuffer.get(i)&0xFF);
118116
}
@@ -124,7 +122,6 @@ private ByteBuffer compressOrder0 (
124122

125123
private ByteBuffer compressOrder1 (
126124
final ByteBuffer inBuffer,
127-
final RangeParams rangeParams,
128125
final ByteBuffer outBuffer) {
129126
int maxSymbol = 0;
130127
final int inSize = inBuffer.remaining();
@@ -145,7 +142,7 @@ private ByteBuffer compressOrder1 (
145142
outBuffer.put((byte) maxSymbol);
146143

147144
// TODO: should we pass outBuffer to rangecoder?
148-
RangeCoder rangeCoder = new RangeCoder();
145+
final RangeCoder rangeCoder = new RangeCoder();
149146

150147
int last = 0;
151148
for (int i = 0; i < inSize; i++ ){
@@ -162,25 +159,24 @@ private ByteBuffer compressOrder1 (
162159

163160
private ByteBuffer compressRLEOrder0 (
164161
final ByteBuffer inBuffer,
165-
final RangeParams rangeParams,
166162
final ByteBuffer outBuffer) {
167163
int maxSymbols = 0;
168-
int inSize = inBuffer.remaining();
164+
final int inSize = inBuffer.remaining();
169165
for (int i = 0; i < inSize; i++) {
170166
if (maxSymbols < (inBuffer.get(i) & 0xFF)) {
171167
maxSymbols = inBuffer.get(i) & 0xFF;
172168
}
173169
}
174170
maxSymbols++; // FIXME not what spec states!
175171

176-
ByteModel modelLit = new ByteModel(maxSymbols);
172+
final ByteModel modelLit = new ByteModel(maxSymbols);
177173
final List<ByteModel> byteModelRunsList = new ArrayList(258);
178174

179175
for (int i=0; i <= 257; i++){
180176
byteModelRunsList.add(i,new ByteModel(4));
181177
}
182178
outBuffer.put((byte)maxSymbols);
183-
RangeCoder rangeCoder = new RangeCoder();
179+
final RangeCoder rangeCoder = new RangeCoder();
184180

185181

186182
int i = 0;
@@ -192,7 +188,6 @@ private ByteBuffer compressRLEOrder0 (
192188
}
193189
run--; // Check this!!
194190
int rctx = inBuffer.get(i) & 0xFF;
195-
int last = inBuffer.get(i) & 0xFF;
196191
i += run+1;
197192
int part = run >=3 ? 3 : run;
198193
byteModelRunsList.get(rctx).modelEncode(outBuffer, rangeCoder, part);
@@ -213,10 +208,9 @@ private ByteBuffer compressRLEOrder0 (
213208

214209
private ByteBuffer compressRLEOrder1 (
215210
final ByteBuffer inBuffer,
216-
final RangeParams rangeParams,
217211
final ByteBuffer outBuffer) {
218212
int maxSymbols = 0;
219-
int inSize = inBuffer.remaining();
213+
final int inSize = inBuffer.remaining();
220214
for (int i = 0; i < inSize; i++) {
221215
if (maxSymbols < (inBuffer.get(i) & 0xFF)) {
222216
maxSymbols = inBuffer.get(i) & 0xFF;
@@ -233,7 +227,7 @@ private ByteBuffer compressRLEOrder1 (
233227
byteModelRunsList.add(i,new ByteModel(4));
234228
}
235229
outBuffer.put((byte)maxSymbols);
236-
RangeCoder rangeCoder = new RangeCoder();
230+
final RangeCoder rangeCoder = new RangeCoder();
237231

238232

239233
int i = 0;
@@ -291,8 +285,7 @@ private ByteBuffer encodePack(
291285
} else if (numSymbols <= 2) {
292286

293287
// 1 bit per value
294-
int dataSize = (int) Math.ceil((double) inSize/8);
295-
data = ByteBuffer.allocate(dataSize);
288+
data = ByteBuffer.allocate((int) Math.ceil((double) inSize/8));
296289
int j = -1;
297290
for (int i = 0; i < inSize; i ++) {
298291
if (i % 8 == 0) {
@@ -303,8 +296,7 @@ private ByteBuffer encodePack(
303296
} else if (numSymbols <= 4) {
304297

305298
// 2 bits per value
306-
int dataSize = (int) Math.ceil((double) inSize/4);
307-
data = ByteBuffer.allocate(dataSize);
299+
data = ByteBuffer.allocate((int) Math.ceil((double) inSize/4));
308300
int j = -1;
309301
for (int i = 0; i < inSize; i ++) {
310302
if (i % 4 == 0) {
@@ -315,8 +307,7 @@ private ByteBuffer encodePack(
315307
} else {
316308

317309
// 4 bits per value
318-
int dataSize = (int) Math.ceil((double)inSize/2);
319-
data = ByteBuffer.allocate(dataSize);
310+
data = ByteBuffer.allocate((int) Math.ceil((double)inSize/2));
320311
int j = -1;
321312
for (int i = 0; i < inSize; i ++) {
322313
if (i % 2 == 0) {

0 commit comments

Comments
 (0)