|
| 1 | +#!/usr/bin/ruby |
| 2 | + |
| 3 | +# Author: Trizen |
| 4 | +# Date: 09 September 2023 |
| 5 | +# https://github.com/trizen |
| 6 | + |
| 7 | +# Implementation of Delta + Run-length + Elias coding, for encoding arbitrary integers. |
| 8 | + |
| 9 | +# References: |
| 10 | +# Data Compression (Summer 2023) - Lecture 5 - Basic Techniques |
| 11 | +# https://youtube.com/watch?v=TdFWb8mL5Gk |
| 12 | +# |
| 13 | +# Data Compression (Summer 2023) - Lecture 6 - Delta Compression and Prediction |
| 14 | +# https://youtube.com/watch?v=-3H_eDbWNEU |
| 15 | + |
| 16 | +func read_bit (FileHandle fh, Ref bitstring) { |
| 17 | + |
| 18 | + if ((*bitstring \\ '').is_empty) { |
| 19 | + *bitstring = unpack('b*', fh.getc \\ die "error") |
| 20 | + } |
| 21 | + |
| 22 | + var bit = (*bitstring).substr(-1) |
| 23 | + *bitstring = (*bitstring).substr(0, -1) |
| 24 | + return bit |
| 25 | +} |
| 26 | + |
| 27 | +func DRE_encode (Array integers, Bool double = false) { |
| 28 | + |
| 29 | + var deltas = [0, integers.len, integers...].diffs.run_length |
| 30 | + var bitstring = FileHandle.new_buf(:raw) |
| 31 | + |
| 32 | + for c,v in (deltas) { |
| 33 | + if (c == 0) { |
| 34 | + bitstring << '0' |
| 35 | + } |
| 36 | + elsif (double) { |
| 37 | + var t = c.abs.as_bin |
| 38 | + var l = t.len.inc.as_bin |
| 39 | + bitstring << join('', '1', ((c < 0) ? '0' : '1'), ('1' * (l.len-1)), '0', l.substr(1), t.substr(1)) |
| 40 | + } |
| 41 | + else { |
| 42 | + var t = c.abs.as_bin |
| 43 | + bitstring << join('', '1', ((c < 0) ? '0' : '1'), ('1' * (t.len-1)), '0', t.substr(1)) |
| 44 | + } |
| 45 | + |
| 46 | + if (v == 1) { |
| 47 | + bitstring << '0' |
| 48 | + } |
| 49 | + else { |
| 50 | + var t = v.as_bin |
| 51 | + bitstring << join('', ('1' * (t.len-1)), '0', t.substr(1)) |
| 52 | + } |
| 53 | + } |
| 54 | + |
| 55 | + pack('B*', bitstring.parent) |
| 56 | +} |
| 57 | + |
| 58 | +func DRE_decode (FileHandle fh, Bool double = false) { |
| 59 | + |
| 60 | + var deltas = [] |
| 61 | + var buffer = '' |
| 62 | + var len = 0 |
| 63 | + |
| 64 | + for (var k = 0 ; k <= len ; ++k) { |
| 65 | + var bit = read_bit(fh, \buffer) |
| 66 | + |
| 67 | + if (bit == '0') { |
| 68 | + deltas << 0 |
| 69 | + } |
| 70 | + elsif (double) { |
| 71 | + var bit = read_bit(fh, \buffer) |
| 72 | + var bl = (^Inf -> first { read_bit(fh, \buffer) != '1' }) |
| 73 | + |
| 74 | + var bl2 = Num('1' + bl.of { read_bit(fh, \buffer) }.join, 2)-1 |
| 75 | + var int = Num('1' + (bl2-1).of { read_bit(fh, \buffer) }.join, 2) |
| 76 | + |
| 77 | + deltas << (bit == '1' ? int : -int) |
| 78 | + } |
| 79 | + else { |
| 80 | + var bit = read_bit(fh, \buffer) |
| 81 | + var n = (^Inf -> first { read_bit(fh, \buffer) != '1' }) |
| 82 | + var d = Num('1' + n.of { read_bit(fh, \buffer) }.join, 2) |
| 83 | + deltas << (bit == '1' ? d : -d) |
| 84 | + } |
| 85 | + |
| 86 | + var bl = (^Inf -> first { read_bit(fh, \buffer) != '1' }) |
| 87 | + |
| 88 | + if (bl > 0) { |
| 89 | + var run = Num('1' + bl.of { read_bit(fh, \buffer) }.join, 2)-1 |
| 90 | + k += run |
| 91 | + deltas << run.of(deltas[-1])... |
| 92 | + } |
| 93 | + |
| 94 | + if (k == 0) { |
| 95 | + len = deltas.pop |
| 96 | + } |
| 97 | + } |
| 98 | + |
| 99 | + var acc = [len, deltas...].acc |
| 100 | + acc.shift |
| 101 | + return acc |
| 102 | +} |
| 103 | + |
| 104 | +var str = %n[6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 1 1 3 3 1 2 3 0 0 1 2 4 2 1 0 1 2 1 1 0 0 1].pack('C*') |
| 105 | + |
| 106 | +var enc = DRE_encode(str.bytes, false) |
| 107 | +var dec = DRE_decode(enc.open_r(:raw), false) |
| 108 | + |
| 109 | +say "Encoded: #{unpack('B*', enc)}" |
| 110 | +say "Decoded: #{dec}" |
| 111 | + |
| 112 | +assert_eq(dec.pack('C*'), str) |
| 113 | + |
| 114 | +do { |
| 115 | + var str = File(__FILE__).read(:raw) |
| 116 | + var encoded = DRE_encode(str.bytes, true) |
| 117 | + var decoded = DRE_decode(encoded.open_r(:raw), true) |
| 118 | + assert_eq(str, decoded.pack('C*')) |
| 119 | +} |
| 120 | + |
| 121 | +__END__ |
| 122 | +Encoded: 111111110011001010111111001001101011010001111101111111101010101011000011100000101000110100101010001101001110001010001001001101001000001000001100 |
| 123 | +Decoded: [6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 1, 3, 3, 1, 2, 3, 0, 0, 1, 2, 4, 2, 1, 0, 1, 2, 1, 1, 0, 0, 1] |
0 commit comments