|
| 1 | +#!/usr/bin/ruby |
| 2 | + |
| 3 | +# Compress/decompress small files using Huffman coding. |
| 4 | + |
| 5 | +define( |
| 6 | + CHUNK_SIZE = 1024**2, # 1 MB |
| 7 | + SIGNATURE = ('HFM' + 3.chr), |
| 8 | +) |
| 9 | + |
| 10 | +func walk(Hash n, String s, Hash h) { |
| 11 | + if (n.has(:a)) { |
| 12 | + h{n{:a}} = s |
| 13 | + return nil |
| 14 | + } |
| 15 | + walk(n{:0}, s+'0', h) |
| 16 | + walk(n{:1}, s+'1', h) |
| 17 | +} |
| 18 | + |
| 19 | +func make_tree(Array bytes) { |
| 20 | + |
| 21 | + var nodes = bytes.freq.sort.map_2d { |b,f| |
| 22 | + Hash(a => b, freq => f) |
| 23 | + } |
| 24 | + |
| 25 | + var n = Hash() |
| 26 | + while (nodes.sort_by!{|h| h{:freq} }.len > 1) { |
| 27 | + n = Hash(:0 => nodes.shift, :1 => nodes.shift) |
| 28 | + n{:freq} = (n{:0}{:freq} + n{:1}{:freq}) |
| 29 | + nodes << n |
| 30 | + } |
| 31 | + |
| 32 | + walk(n, '', n{:tree} = Hash()) |
| 33 | + return n |
| 34 | +} |
| 35 | + |
| 36 | +func huffman_encode(Array bytes, Hash t) { |
| 37 | + bytes.map { t{_} }.join |
| 38 | +} |
| 39 | + |
| 40 | +func huffman_decode (String bits, Hash tree) { |
| 41 | + bits.gsub(Regex('(' + tree.keys.sort_by { .len }.join('|') + ')'), {|s| tree{s} }) |
| 42 | +} |
| 43 | + |
| 44 | +func create_huffman_entry (Array bytes, FileHandle out_fh) { |
| 45 | + |
| 46 | + var h = make_tree(bytes){:tree} |
| 47 | + var enc = huffman_encode(bytes, h) |
| 48 | + |
| 49 | + var dict = '' |
| 50 | + var codes = '' |
| 51 | + |
| 52 | + for i in (0..255) { |
| 53 | + var c = (h{i} \\ '') |
| 54 | + codes += c |
| 55 | + dict += c.len.chr |
| 56 | + } |
| 57 | + |
| 58 | + out_fh.print(dict) |
| 59 | + out_fh.print(pack("B*", codes)) |
| 60 | + out_fh.print(pack("N", enc.len)) |
| 61 | + out_fh.print(pack("B*", enc)) |
| 62 | +} |
| 63 | + |
| 64 | +# Compress file |
| 65 | +func huffman_compress_file(File input, File output) { |
| 66 | + |
| 67 | + var in_fh = input.open('<:raw') || die "Can't open file <<#{input}>> for reading" |
| 68 | + var out_fh = output.open('>:raw') || die "Can't open file <<#{output}>> for writing" |
| 69 | + |
| 70 | + var header = SIGNATURE |
| 71 | + |
| 72 | + # Print the header |
| 73 | + out_fh.print(header) |
| 74 | + |
| 75 | + while (in_fh.read(\var chunk, CHUNK_SIZE)) { |
| 76 | + create_huffman_entry([unpack('C*', chunk)], out_fh) |
| 77 | + } |
| 78 | + |
| 79 | + # Close the files |
| 80 | + in_fh.close |
| 81 | + out_fh.close |
| 82 | +} |
| 83 | + |
| 84 | +func read_bits (FileHandle fh, Number bits_len) { |
| 85 | + |
| 86 | + var str = '' |
| 87 | + fh.read(\str, bits_len>>3) |
| 88 | + str = unpack('B*', str) |
| 89 | + |
| 90 | + while (str.len < bits_len) { |
| 91 | + str += unpack('B*', fh.getc \\ break) |
| 92 | + } |
| 93 | + |
| 94 | + if (str.len > bits_len) { |
| 95 | + str.substr!(0, bits_len) |
| 96 | + } |
| 97 | + |
| 98 | + return str |
| 99 | +} |
| 100 | + |
| 101 | +func decode_huffman_entry (FileHandle fh, FileHandle out_fh) { |
| 102 | + |
| 103 | + var codes = [] |
| 104 | + var codes_len = 0 |
| 105 | + |
| 106 | + fh.read(\var buffer, 256) |
| 107 | + |
| 108 | + [unpack('C*', buffer)].map{ Num(_) }.each_kv {|c,l| |
| 109 | + if (l > 0) { |
| 110 | + codes_len += l |
| 111 | + codes << [c, l] |
| 112 | + } |
| 113 | + } |
| 114 | + |
| 115 | + var codes_bin = read_bits(fh, codes_len) |
| 116 | + |
| 117 | + var rev_dict = Hash() |
| 118 | + for c,l in (codes) { |
| 119 | + var code = substr(codes_bin, 0, l) |
| 120 | + codes_bin.substr!(l) |
| 121 | + rev_dict{code} = c.chr |
| 122 | + } |
| 123 | + |
| 124 | + var enc_len = Num(unpack('N', 4.of{ fh.getc }.join)) |
| 125 | + |
| 126 | + if (enc_len > 0) { |
| 127 | + var enc_data = read_bits(fh, enc_len) |
| 128 | + out_fh.print(huffman_decode(enc_data, rev_dict)) |
| 129 | + return true |
| 130 | + } |
| 131 | + |
| 132 | + return false |
| 133 | +} |
| 134 | + |
| 135 | +# Decompress file |
| 136 | +func huffman_decompress_file(File input, File output) { |
| 137 | + |
| 138 | + var in_fh = input.open('<:raw') || die "Can't open file <<#{input}>> for reading" |
| 139 | + |
| 140 | + if (SIGNATURE.len.of { in_fh.getc }.join != SIGNATURE) { |
| 141 | + die "Not a HFM archive!\n" |
| 142 | + } |
| 143 | + |
| 144 | + var out_fh = output.open('>:raw') || die "Can't open file <<#{output}>> for writing" |
| 145 | + |
| 146 | + while (!in_fh.eof) { |
| 147 | + decode_huffman_entry(in_fh, out_fh) || break |
| 148 | + } |
| 149 | + |
| 150 | + in_fh.close |
| 151 | + out_fh.close |
| 152 | +} |
| 153 | + |
| 154 | +ARGV.getopt!('d', \var decode) |
| 155 | + |
| 156 | +var file = File(ARGV.shift) || do { |
| 157 | + say "usage: #{File(__MAIN__).basename} [-d] [input file]" |
| 158 | + Sys.exit(2) |
| 159 | +} |
| 160 | + |
| 161 | +if (decode || file.match(/\.hfm\.enc\z/)) { |
| 162 | + huffman_decompress_file(file, File("output.hfm.dec")) |
| 163 | +} |
| 164 | +else { |
| 165 | + huffman_compress_file(file, File("output.hfm.enc")) |
| 166 | +} |
0 commit comments