Skip to content

Commit f9b066c

Browse files
author
yash-puligundla
committed
Addressing feedback from nov 21 - part 1
1 parent 58ace69 commit f9b066c

File tree

5 files changed

+91
-95
lines changed

5 files changed

+91
-95
lines changed

src/main/java/htsjdk/samtools/cram/compression/range/ByteModel.java

+5-15
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ public class ByteModel {
88
// the cumulative frequencies of all symbols prior to this symbol,
99
// and the total of all frequencies.
1010
public int totalFrequency;
11-
public int maxSymbol;
11+
public final int maxSymbol;
1212
public final int[] symbols;
1313
public final int[] frequencies;
1414

@@ -24,17 +24,7 @@ public ByteModel(final int numSymbols) {
2424
}
2525
}
2626

27-
// TODO: use this method to reset
28-
public void reset() {
29-
totalFrequency = 0;
30-
for (int i = 0; i <= maxSymbol; i++) {
31-
symbols[i] = 0;
32-
frequencies[i] = 0;
33-
}
34-
// maxSymbol = 0; // TODO: ???
35-
}
36-
37-
public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){
27+
public int modelDecode(final ByteBuffer inBuffer, final RangeCoder rangeCoder){
3828

3929
// decodes one symbol
4030
final int freq = rangeCoder.rangeGetFrequency(totalFrequency);
@@ -45,7 +35,7 @@ public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){
4535
}
4636

4737
// update rangecoder
48-
rangeCoder.rangeDecode(inBuffer,cumulativeFrequency,frequencies[x],totalFrequency);
38+
rangeCoder.rangeDecode(inBuffer,cumulativeFrequency,frequencies[x]);
4939

5040
// update model frequencies
5141
frequencies[x] += Constants.STEP;
@@ -57,7 +47,7 @@ public int modelDecode(ByteBuffer inBuffer, RangeCoder rangeCoder){
5747
}
5848

5949
// keep symbols approximately frequency sorted
60-
int symbol = symbols[x];
50+
final int symbol = symbols[x];
6151
if (x > 0 && frequencies[x] > frequencies[x-1]){
6252
// Swap frequencies[x], frequencies[x-1]
6353
int tmp = frequencies[x];
@@ -81,7 +71,7 @@ public void modelRenormalize(){
8171
}
8272
}
8373

84-
public void modelEncode(final ByteBuffer outBuffer, RangeCoder rangeCoder, int symbol){
74+
public void modelEncode(final ByteBuffer outBuffer, final RangeCoder rangeCoder, final int symbol){
8575

8676
// encodes one input symbol
8777
int cumulativeFrequency = 0;

src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java

+16-12
Original file line numberDiff line numberDiff line change
@@ -21,35 +21,39 @@ protected RangeCoder() {
2121
this.cache = 0;
2222
}
2323

24-
protected void rangeDecodeStart(ByteBuffer inBuffer){
24+
protected void rangeDecodeStart(final ByteBuffer inBuffer){
2525
for (int i = 0; i < 5; i++){
2626

2727
// Get next 5 bytes. Ensure it is +ve
2828
code = (code << 8) + (inBuffer.get() & 0xFF);
2929
}
3030
}
3131

32-
protected void rangeDecode(ByteBuffer inBuffer, int sym_low, int sym_freq, int tot_freq){
33-
code -= sym_low * range;
34-
range *= sym_freq;
32+
protected void rangeDecode(final ByteBuffer inBuffer, final int cumulativeFrequency, final int symbolFrequency){
33+
code -= cumulativeFrequency * range;
34+
range *= symbolFrequency;
3535

3636
while (range < (1<<24)) {
3737
range <<= 8;
3838
code = (code << 8) + (inBuffer.get() & 0xFF); // Ensure code is positive
3939
}
4040
}
4141

42-
protected int rangeGetFrequency(final int tot_freq){
43-
range = (long) Math.floor(range / tot_freq);
42+
protected int rangeGetFrequency(final int totalFrequency){
43+
range = (long) Math.floor(range / totalFrequency);
4444
return (int) Math.floor(code / range);
4545
}
4646

47-
protected void rangeEncode(final ByteBuffer outBuffer, final int sym_low, final int sym_freq, final int tot_freq){
48-
long old_low = low;
49-
range = (long) Math.floor(range/tot_freq);
50-
low += sym_low * range;
47+
protected void rangeEncode(
48+
final ByteBuffer outBuffer,
49+
final int cumulativeFrequency,
50+
final int symbolFrequency,
51+
final int totalFrequency){
52+
final long old_low = low;
53+
range = (long) Math.floor(range/totalFrequency);
54+
low += cumulativeFrequency * range;
5155
low &= 0xFFFFFFFFL; // keep bottom 4 bytes, shift the top byte out of low
52-
range *= sym_freq;
56+
range *= symbolFrequency;
5357

5458
if (low < old_low) {
5559
carry = true;
@@ -70,7 +74,7 @@ protected void rangeEncodeEnd(final ByteBuffer outBuffer){
7074
}
7175
}
7276

73-
private void rangeShiftLow(ByteBuffer outBuffer) {
77+
private void rangeShiftLow(final ByteBuffer outBuffer) {
7478
// rangeShiftLow tracks the total number of extra bytes to emit and
7579
// carry indicates whether they are a string of 0xFF or 0x00 values
7680

src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java

+5-55
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import htsjdk.samtools.cram.CRAMException;
44
import htsjdk.samtools.cram.compression.BZIP2ExternalCompressor;
5-
import htsjdk.samtools.cram.compression.rans.Utils;
65

76
import java.nio.ByteBuffer;
87
import java.nio.ByteOrder;
@@ -39,16 +38,16 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
3938
// if pack, get pack metadata, which will be used later to decode packed data
4039
int packDataLength = 0;
4140
int numSymbols = 0;
42-
int[] packMappingTable = new int[0];
41+
byte[] packMappingTable = null;
4342
if (rangeParams.isPack()){
4443
packDataLength = outSize;
4544
numSymbols = inBuffer.get() & 0xFF;
4645

4746
// if (numSymbols > 16 or numSymbols==0), raise exception
4847
if (numSymbols <= 16 && numSymbols!=0) {
49-
packMappingTable = new int[numSymbols];
48+
packMappingTable = new byte[numSymbols];
5049
for (int i = 0; i < numSymbols; i++) {
51-
packMappingTable[i] = inBuffer.get() & 0xFF;
50+
packMappingTable[i] = inBuffer.get();
5251
}
5352
outSize = Utils.readUint7(inBuffer);
5453
} else {
@@ -92,8 +91,8 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, int outSize) {
9291
}
9392

9493
// if pack, then decodePack
95-
if (rangeParams.isPack() && packMappingTable.length > 0) {
96-
outBuffer = decodePack(outBuffer, packMappingTable, numSymbols, packDataLength);
94+
if (rangeParams.isPack()) {
95+
outBuffer = Utils.decodePack(outBuffer, packMappingTable, numSymbols, packDataLength);
9796
}
9897
outBuffer.rewind();
9998
return outBuffer;
@@ -227,55 +226,6 @@ private ByteBuffer uncompressEXT(
227226
return outBuffer;
228227
}
229228

230-
private ByteBuffer decodePack(ByteBuffer inBuffer, final int[] packMappingTable, int numSymbols, int uncompressedPackOutputLength) {
231-
ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength);
232-
int j = 0;
233-
234-
if (numSymbols <= 1) {
235-
for (int i=0; i < uncompressedPackOutputLength; i++){
236-
outBufferPack.put(i, (byte) packMappingTable[0]);
237-
}
238-
}
239-
240-
// 1 bit per value
241-
else if (numSymbols <= 2) {
242-
int v = 0;
243-
for (int i=0; i < uncompressedPackOutputLength; i++){
244-
if (i % 8 == 0){
245-
v = inBuffer.get(j++);
246-
}
247-
outBufferPack.put(i, (byte) packMappingTable[v & 1]);
248-
v >>=1;
249-
}
250-
}
251-
252-
// 2 bits per value
253-
else if (numSymbols <= 4){
254-
int v = 0;
255-
for(int i=0; i < uncompressedPackOutputLength; i++){
256-
if (i % 4 == 0){
257-
v = inBuffer.get(j++);
258-
}
259-
outBufferPack.put(i, (byte) packMappingTable[v & 3]);
260-
v >>=2;
261-
}
262-
}
263-
264-
// 4 bits per value
265-
else if (numSymbols <= 16){
266-
int v = 0;
267-
for(int i=0; i < uncompressedPackOutputLength; i++){
268-
if (i % 2 == 0){
269-
v = inBuffer.get(j++);
270-
}
271-
outBufferPack.put(i, (byte) packMappingTable[v & 15]);
272-
v >>=4;
273-
}
274-
}
275-
inBuffer = outBufferPack;
276-
return inBuffer;
277-
}
278-
279229
private ByteBuffer decodeStripe(ByteBuffer inBuffer, final int outSize){
280230

281231
final int numInterleaveStreams = inBuffer.get() & 0xFF;

src/main/java/htsjdk/samtools/cram/compression/range/Utils.java

+52
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,56 @@ public static int readUint7(ByteBuffer cp) {
2828
} while ((c & 0x80) != 0);
2929
return i;
3030
}
31+
32+
public static ByteBuffer decodePack(
33+
final ByteBuffer inBuffer,
34+
final byte[] packMappingTable,
35+
final int numSymbols,
36+
final int uncompressedPackOutputLength) {
37+
ByteBuffer outBufferPack = ByteBuffer.allocate(uncompressedPackOutputLength);
38+
int j = 0;
39+
40+
if (numSymbols <= 1) {
41+
for (int i=0; i < uncompressedPackOutputLength; i++){
42+
outBufferPack.put(i, packMappingTable[0]);
43+
}
44+
}
45+
46+
// 1 bit per value
47+
else if (numSymbols <= 2) {
48+
int v = 0;
49+
for (int i=0; i < uncompressedPackOutputLength; i++){
50+
if (i % 8 == 0){
51+
v = inBuffer.get(j++);
52+
}
53+
outBufferPack.put(i, packMappingTable[v & 1]);
54+
v >>=1;
55+
}
56+
}
57+
58+
// 2 bits per value
59+
else if (numSymbols <= 4){
60+
int v = 0;
61+
for(int i=0; i < uncompressedPackOutputLength; i++){
62+
if (i % 4 == 0){
63+
v = inBuffer.get(j++);
64+
}
65+
outBufferPack.put(i, packMappingTable[v & 3]);
66+
v >>=2;
67+
}
68+
}
69+
70+
// 4 bits per value
71+
else if (numSymbols <= 16){
72+
int v = 0;
73+
for(int i=0; i < uncompressedPackOutputLength; i++){
74+
if (i % 2 == 0){
75+
v = inBuffer.get(j++);
76+
}
77+
outBufferPack.put(i, packMappingTable[v & 15]);
78+
v >>=4;
79+
}
80+
}
81+
return outBufferPack;
82+
}
3183
}

src/test/java/htsjdk/samtools/cram/RangeInteropTest.java

+13-13
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@
1919
import java.util.ArrayList;
2020
import java.util.List;
2121

22-
import static htsjdk.samtools.cram.CRAMInteropTestUtils.filterEmbeddedNewlines;
23-
import static htsjdk.samtools.cram.CRAMInteropTestUtils.getInteropCompressedFilePaths;
24-
import static htsjdk.samtools.cram.CRAMInteropTestUtils.getParamsFormatFlags;
25-
import static htsjdk.samtools.cram.CRAMInteropTestUtils.getUnCompressedFilePath;
26-
2722
public class RangeInteropTest extends HtsjdkTest {
2823
public static final String COMPRESSED_RANGE_DIR = "arith";
2924

@@ -34,13 +29,13 @@ public Object[][] getRoundTripTestCases() throws IOException {
3429
// compressed testfile path, uncompressed testfile path,
3530
// Range encoder, Range decoder, Range params
3631
final List<Object[]> testCases = new ArrayList<>();
37-
for (Path path : getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) {
32+
for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) {
3833
Object[] objects = new Object[]{
3934
path,
40-
getUnCompressedFilePath(path),
35+
CRAMInteropTestUtils.getUnCompressedFilePath(path),
4136
new RangeEncode(),
4237
new RangeDecode(),
43-
new RangeParams(getParamsFormatFlags(path))
38+
new RangeParams(CRAMInteropTestUtils.getParamsFormatFlags(path))
4439
};
4540
testCases.add(objects);
4641
}
@@ -70,7 +65,7 @@ public void testRangeRoundTrip(
7065
// preprocess the uncompressed data (to match what the htscodecs-library test harness does)
7166
// by filtering out the embedded newlines, and then round trip through Range codec and compare the
7267
// results
73-
final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)));
68+
final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)));
7469

7570
if (params.isStripe()) {
7671
Assert.assertThrows(CRAMException.class, () -> rangeEncode.compress(uncompressedInteropBytes, params));
@@ -95,11 +90,16 @@ public void testDecodeOnly(
9590
try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath);
9691
final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath)
9792
) {
98-
9993
// preprocess the uncompressed data (to match what the htscodecs-library test harness does)
100-
// by filtering out the embedded newlines, and then round trip through Range codec and compare the
101-
// results
102-
final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)));
94+
// by filtering out the embedded newlines, and then round trip through Range codec
95+
// and compare the results
96+
97+
final ByteBuffer uncompressedInteropBytes;
98+
if (uncompressedInteropPath.toString().contains("htscodecs/tests/dat/u")) {
99+
uncompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(uncompressedInteropStream));
100+
} else {
101+
uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)));
102+
}
103103
final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream));
104104

105105
// Use htsjdk to uncompress the precompressed file from htscodecs repo

0 commit comments

Comments
 (0)