-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathLibDeflate.lua
3605 lines (3303 loc) · 127 KB
/
LibDeflate.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
--[[--
LibDeflate 1.0.2-release <br>
Pure Lua compressor and decompressor with high compression ratio using
DEFLATE/zlib format.
@file LibDeflate.lua
@author Haoqian He (Github: SafeteeWoW; World of Warcraft: Safetyy-Illidan(US))
@copyright LibDeflate <2018-2021> Haoqian He
@license zlib License
This library is implemented according to the following specifications. <br>
Report a bug if LibDeflate is not fully compliant with those specs. <br>
Both compressors and decompressors have been implemented in the library.<br>
1. RFC1950: DEFLATE Compressed Data Format Specification version 1.3 <br>
https://tools.ietf.org/html/rfc1951 <br>
2. RFC1951: ZLIB Compressed Data Format Specification version 3.3 <br>
https://tools.ietf.org/html/rfc1950 <br>
This library requires Lua 5.1/5.2/5.3/5.4 interpreter or LuaJIT v2.0+. <br>
This library does not have any dependencies. <br>
<br>
This file "LibDeflate.lua" is the only source file of
the library. <br>
Submit suggestions or report bugs to
https://github.com/safeteeWow/LibDeflate/issues
]] --[[
zlib License
(C) 2018-2021 Haoqian He
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
License History:
1. GNU General Public License Version 3 in v1.0.0 and earlier versions.
2. GNU Lesser General Public License Version 3 in v1.0.1
3. the zlib License since v1.0.2
Credits and Disclaimer:
This library rewrites the code from the algorithm
and the ideas of the following projects,
and uses their code to help to test the correctness of this library,
but their code is not included directly in the library itself.
Their original licenses shall be comply when used.
1. zlib, by Jean-loup Gailly (compression) and Mark Adler (decompression).
http://www.zlib.net/
Licensed under zlib License. http://www.zlib.net/zlib_license.html
For the compression algorithm.
2. puff, by Mark Adler. https://github.com/madler/zlib/tree/master/contrib/puff
Licensed under zlib License. http://www.zlib.net/zlib_license.html
For the decompression algorithm.
3. LibCompress, by jjsheets and Galmok of European Stormrage (Horde)
https://www.wowace.com/projects/libcompress
Licensed under GPLv2.
https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
For the code to create customized codec.
4. WeakAuras2,
https://github.com/WeakAuras/WeakAuras2
Licensed under GPLv2.
For the 6bit encoding and decoding.
]] --[[
Curseforge auto-packaging replacements:
Project Date: @project-date-iso@
Project Hash: @project-hash@
Project Version: @project-version@
--]] local LibDeflate
do
-- Semantic version. all lowercase.
-- Suffix can be alpha1, alpha2, beta1, beta2, rc1, rc2, etc.
-- NOTE: Two version numbers needs to modify.
-- 1. On the top of LibDeflate.lua
-- 2. _VERSION
-- 3. _MINOR
-- version to store the official version of LibDeflate
local _VERSION = "1.0.2-release"
-- When MAJOR is changed, I should name it as LibDeflate2
local _MAJOR = "LibDeflate"
-- Update this whenever a new version, for LibStub version registration.
-- 0 : v0.x
-- 1 : v1.0.0
-- 2 : v1.0.1
-- 3 : v1.0.2
local _MINOR = 3
local _COPYRIGHT = "LibDeflate " .. _VERSION ..
" Copyright (C) 2018-2021 Haoqian He." ..
" Licensed under the zlib License"
-- Register in the World of Warcraft library "LibStub" if detected.
if LibStub then
local lib, minor = LibStub:GetLibrary(_MAJOR, true)
if lib and minor and minor >= _MINOR then -- No need to update.
return lib
else -- Update or first time register
LibDeflate = LibStub:NewLibrary(_MAJOR, _MINOR)
-- NOTE: It is important that new version has implemented
-- all exported APIs and tables in the old version,
-- so the old library is fully garbage collected,
-- and we 100% ensure the backward compatibility.
end
else -- "LibStub" is not detected.
LibDeflate = {}
end
LibDeflate._VERSION = _VERSION
LibDeflate._MAJOR = _MAJOR
LibDeflate._MINOR = _MINOR
LibDeflate._COPYRIGHT = _COPYRIGHT
end
-- localize Lua api for faster access.
local assert = assert
local error = error
local pairs = pairs
local string_byte = string.byte
local string_char = string.char
local string_find = string.find
local string_gsub = string.gsub
local string_sub = string.sub
local table_concat = table.concat
local table_sort = table.sort
local tostring = tostring
local type = type
-- Converts i to 2^i, (0<=i<=32)
-- This is used to implement bit left shift and bit right shift.
-- "x >> y" in C: "(x-x%_pow2[y])/_pow2[y]" in Lua
-- "x << y" in C: "x*_pow2[y]" in Lua
local _pow2 = {}
-- Converts any byte to a character, (0<=byte<=255)
local _byte_to_char = {}
-- _reverseBitsTbl[len][val] stores the bit reverse of
-- the number with bit length "len" and value "val"
-- For example, decimal number 6 with bits length 5 is binary 00110
-- It's reverse is binary 01100,
-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
-- 1<=len<=9, 0<=val<=2^len-1
-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
-- of a huffman alphabet is 9?
local _reverse_bits_tbl = {}
-- Convert a LZ77 length (3<=len<=258) to
-- a deflate literal/LZ77_length code (257<=code<=285)
local _length_to_deflate_code = {}
-- convert a LZ77 length (3<=len<=258) to
-- a deflate literal/LZ77_length code extra bits.
local _length_to_deflate_extra_bits = {}
-- Convert a LZ77 length (3<=len<=258) to
-- a deflate literal/LZ77_length code extra bit length.
local _length_to_deflate_extra_bitlen = {}
-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
local _dist256_to_deflate_code = {}
-- Convert a small LZ77 distance (1<=dist<=256) to
-- a deflate distance code extra bits.
local _dist256_to_deflate_extra_bits = {}
-- Convert a small LZ77 distance (1<=dist<=256) to
-- a deflate distance code extra bit length.
local _dist256_to_deflate_extra_bitlen = {}
-- Convert a literal/LZ77_length deflate code to LZ77 base length
-- The key of the table is (code - 256), 257<=code<=285
local _literal_deflate_code_to_base_len =
{
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67,
83, 99, 115, 131, 163, 195, 227, 258
}
-- Convert a literal/LZ77_length deflate code to base LZ77 length extra bits
-- The key of the table is (code - 256), 257<=code<=285
local _literal_deflate_code_to_extra_bitlen =
{
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
5, 5, 5, 0
}
-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
local _dist_deflate_code_to_base_dist = {
[0] = 1,
2,
3,
4,
5,
7,
9,
13,
17,
25,
33,
49,
65,
97,
129,
193,
257,
385,
513,
769,
1025,
1537,
2049,
3073,
4097,
6145,
8193,
12289,
16385,
24577
}
-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
local _dist_deflate_code_to_extra_bitlen =
{
[0] = 0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
6,
7,
7,
8,
8,
9,
9,
10,
10,
11,
11,
12,
12,
13,
13
}
-- The code order of the first huffman header in the dynamic deflate block.
-- See the page 12 of RFC1951
local _rle_codes_huffman_bitlen_order = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
}
-- The following tables are used by fixed deflate block.
-- The value of these tables are assigned at the bottom of the source.
-- The huffman code of the literal/LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_code
-- Convert huffman code of the literal/LZ77_length to deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_to_deflate_code
-- The bit length of the huffman code of literal/LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_bitlen
-- The count of each bit length of the literal/LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_bitlen_count
-- The huffman code of the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_code
-- Convert huffman code of the distance to deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_to_deflate_code
-- The bit length of the huffman code of the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_bitlen
-- The count of each bit length of the huffman code of
-- the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_bitlen_count
for i = 0, 255 do _byte_to_char[i] = string_char(i) end
do
local pow = 1
for i = 0, 32 do
_pow2[i] = pow
pow = pow * 2
end
end
for i = 1, 9 do
_reverse_bits_tbl[i] = {}
for j = 0, _pow2[i + 1] - 1 do
local reverse = 0
local value = j
for _ = 1, i do
-- The following line is equivalent to "res | (code %2)" in C.
reverse = reverse - reverse % 2 +
(((reverse % 2 == 1) or (value % 2) == 1) and 1 or 0)
value = (value - value % 2) / 2
reverse = reverse * 2
end
_reverse_bits_tbl[i][j] = (reverse - reverse % 2) / 2
end
end
-- The source code is written according to the pattern in the numbers
-- in RFC1951 Page10.
do
local a = 18
local b = 16
local c = 265
local bitlen = 1
for len = 3, 258 do
if len <= 10 then
_length_to_deflate_code[len] = len + 254
_length_to_deflate_extra_bitlen[len] = 0
elseif len == 258 then
_length_to_deflate_code[len] = 285
_length_to_deflate_extra_bitlen[len] = 0
else
if len > a then
a = a + b
b = b * 2
c = c + 4
bitlen = bitlen + 1
end
local t = len - a - 1 + b / 2
_length_to_deflate_code[len] = (t - (t % (b / 8))) / (b / 8) + c
_length_to_deflate_extra_bitlen[len] = bitlen
_length_to_deflate_extra_bits[len] = t % (b / 8)
end
end
end
-- The source code is written according to the pattern in the numbers
-- in RFC1951 Page11.
do
_dist256_to_deflate_code[1] = 0
_dist256_to_deflate_code[2] = 1
_dist256_to_deflate_extra_bitlen[1] = 0
_dist256_to_deflate_extra_bitlen[2] = 0
local a = 3
local b = 4
local code = 2
local bitlen = 0
for dist = 3, 256 do
if dist > b then
a = a * 2
b = b * 2
code = code + 2
bitlen = bitlen + 1
end
_dist256_to_deflate_code[dist] = (dist <= a) and code or (code + 1)
_dist256_to_deflate_extra_bitlen[dist] = (bitlen < 0) and 0 or bitlen
if b >= 8 then
_dist256_to_deflate_extra_bits[dist] = (dist - b / 2 - 1) % (b / 4)
end
end
end
--- Calculate the Adler-32 checksum of the string. <br>
-- See RFC1950 Page 9 https://tools.ietf.org/html/rfc1950 for the
-- definition of Adler-32 checksum.
-- @param str [string] the input string to calcuate its Adler-32 checksum.
-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
-- and less than 2^32 (4294967296).
function LibDeflate:Adler32(str)
-- This function is loop unrolled by better performance.
--
-- Here is the minimum code:
--
-- local a = 1
-- local b = 0
-- for i=1, #str do
-- local s = string.byte(str, i, i)
-- a = (a+s)%65521
-- b = (b+a)%65521
-- end
-- return b*65536+a
if type(str) ~= "string" then
error(("Usage: LibDeflate:Adler32(str):" ..
" 'str' - string expected got '%s'."):format(type(str)), 2)
end
local strlen = #str
local i = 1
local a = 1
local b = 0
while i <= strlen - 15 do
local x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16 =
string_byte(str, i, i + 15)
b =
(b + 16 * a + 16 * x1 + 15 * x2 + 14 * x3 + 13 * x4 + 12 * x5 + 11 * x6 +
10 * x7 + 9 * x8 + 8 * x9 + 7 * x10 + 6 * x11 + 5 * x12 + 4 * x13 + 3 *
x14 + 2 * x15 + x16) % 65521
a =
(a + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 +
x14 + x15 + x16) % 65521
i = i + 16
end
while (i <= strlen) do
local x = string_byte(str, i, i)
a = (a + x) % 65521
b = (b + a) % 65521
i = i + 1
end
return (b * 65536 + a) % 4294967296
end
-- Compare adler32 checksum.
-- adler32 should be compared with a mod to avoid sign problem
-- 4072834167 (unsigned) is the same adler32 as -222133129
local function IsEqualAdler32(actual, expected)
return (actual % 4294967296) == (expected % 4294967296)
end
--- Create a preset dictionary.
--
-- This function is not fast, and the memory consumption of the produced
-- dictionary is about 50 times of the input string. Therefore, it is suggestted
-- to run this function only once in your program.
--
-- It is very important to know that if you do use a preset dictionary,
-- compressors and decompressors MUST USE THE SAME dictionary. That is,
-- dictionary must be created using the same string. If you update your program
-- with a new dictionary, people with the old version won't be able to transmit
-- data with people with the new version. Therefore, changing the dictionary
-- must be very careful.
--
-- The parameters "strlen" and "adler32" add a layer of verification to ensure
-- the parameter "str" is not modified unintentionally during the program
-- development.
--
-- @usage local dict_str = "1234567890"
--
-- -- print(dict_str:len(), LibDeflate:Adler32(dict_str))
-- -- Hardcode the print result below to verify it to avoid acciently
-- -- modification of 'str' during the program development.
-- -- string length: 10, Adler-32: 187433486,
-- -- Don't calculate string length and its Adler-32 at run-time.
--
-- local dict = LibDeflate:CreateDictionary(dict_str, 10, 187433486)
--
-- @param str [string] The string used as the preset dictionary. <br>
-- You should put stuffs that frequently appears in the dictionary
-- string and preferablely put more frequently appeared stuffs toward the end
-- of the string. <br>
-- Empty string and string longer than 32768 bytes are not allowed.
-- @param strlen [integer] The length of 'str'. Please pass in this parameter
-- as a hardcoded constant, in order to verify the content of 'str'. The value
-- of this parameter should be known before your program runs.
-- @param adler32 [integer] The Adler-32 checksum of 'str'. Please pass in this
-- parameter as a hardcoded constant, in order to verify the content of 'str'.
-- The value of this parameter should be known before your program runs.
-- @return [table] The dictionary used for preset dictionary compression and
-- decompression.
-- @raise error if 'strlen' does not match the length of 'str',
-- or if 'adler32' does not match the Adler-32 checksum of 'str'.
function LibDeflate:CreateDictionary(str, strlen, adler32)
if type(str) ~= "string" then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'str' - string expected got '%s'."):format(type(str)), 2)
end
if type(strlen) ~= "number" then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'strlen' - number expected got '%s'."):format(type(strlen)), 2)
end
if type(adler32) ~= "number" then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'adler32' - number expected got '%s'."):format(type(adler32)), 2)
end
if strlen ~= #str then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'strlen' does not match the actual length of 'str'." ..
" 'strlen': %u, '#str': %u ." ..
" Please check if 'str' is modified unintentionally."):format(
strlen, #str))
end
if strlen == 0 then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'str' - Empty string is not allowed."), 2)
end
if strlen > 32768 then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'str' - string longer than 32768 bytes is not allowed." ..
" Got %d bytes."):format(strlen), 2)
end
local actual_adler32 = self:Adler32(str)
if not IsEqualAdler32(adler32, actual_adler32) then
error(("Usage: LibDeflate:CreateDictionary(str, strlen, adler32):" ..
" 'adler32' does not match the actual adler32 of 'str'." ..
" 'adler32': %u, 'Adler32(str)': %u ." ..
" Please check if 'str' is modified unintentionally."):format(
adler32, actual_adler32))
end
local dictionary = {}
dictionary.adler32 = adler32
dictionary.hash_tables = {}
dictionary.string_table = {}
dictionary.strlen = strlen
local string_table = dictionary.string_table
local hash_tables = dictionary.hash_tables
string_table[1] = string_byte(str, 1, 1)
string_table[2] = string_byte(str, 2, 2)
if strlen >= 3 then
local i = 1
local hash = string_table[1] * 256 + string_table[2]
while i <= strlen - 2 - 3 do
local x1, x2, x3, x4 = string_byte(str, i + 2, i + 5)
string_table[i + 2] = x1
string_table[i + 3] = x2
string_table[i + 4] = x3
string_table[i + 5] = x4
hash = (hash * 256 + x1) % 16777216
local t = hash_tables[hash]
if not t then
t = {};
hash_tables[hash] = t
end
t[#t + 1] = i - strlen
i = i + 1
hash = (hash * 256 + x2) % 16777216
t = hash_tables[hash]
if not t then
t = {};
hash_tables[hash] = t
end
t[#t + 1] = i - strlen
i = i + 1
hash = (hash * 256 + x3) % 16777216
t = hash_tables[hash]
if not t then
t = {};
hash_tables[hash] = t
end
t[#t + 1] = i - strlen
i = i + 1
hash = (hash * 256 + x4) % 16777216
t = hash_tables[hash]
if not t then
t = {};
hash_tables[hash] = t
end
t[#t + 1] = i - strlen
i = i + 1
end
while i <= strlen - 2 do
local x = string_byte(str, i + 2)
string_table[i + 2] = x
hash = (hash * 256 + x) % 16777216
local t = hash_tables[hash]
if not t then
t = {};
hash_tables[hash] = t
end
t[#t + 1] = i - strlen
i = i + 1
end
end
return dictionary
end
-- Check if the dictionary is valid.
-- @param dictionary The preset dictionary for compression and decompression.
-- @return true if valid, false if not valid.
-- @return if not valid, the error message.
local function IsValidDictionary(dictionary)
if type(dictionary) ~= "table" then
return false,
("'dictionary' - table expected got '%s'."):format(type(dictionary))
end
if type(dictionary.adler32) ~= "number" or type(dictionary.string_table) ~=
"table" or type(dictionary.strlen) ~= "number" or dictionary.strlen <= 0 or
dictionary.strlen > 32768 or dictionary.strlen ~= #dictionary.string_table or
type(dictionary.hash_tables) ~= "table" then
return false,
("'dictionary' - corrupted dictionary."):format(type(dictionary))
end
return true, ""
end
--[[
key of the configuration table is the compression level,
and its value stores the compression setting.
These numbers come from zlib source code.
Higher compression level usually means better compression.
(Because LibDeflate uses a simplified version of zlib algorithm,
there is no guarantee that higher compression level does not create
bigger file than lower level, but I can say it's 99% likely)
Be careful with the high compression level. This is a pure lua
implementation compressor/decompressor, which is significant slower than
a C/C++ equivalant compressor/decompressor. Very high compression level
costs significant more CPU time, and usually compression size won't be
significant smaller when you increase compression level by 1, when the
level is already very high. Benchmark yourself if you can afford it.
See also https://github.com/madler/zlib/blob/master/doc/algorithm.txt,
https://github.com/madler/zlib/blob/master/deflate.c for more information.
The meaning of each field:
@field 1 use_lazy_evaluation:
true/false. Whether the program uses lazy evaluation.
See what is "lazy evaluation" in the link above.
lazy_evaluation improves ratio, but relatively slow.
@field 2 good_prev_length:
Only effective if lazy is set, Only use 1/4 of max_chain,
if prev length of lazy match is above this.
@field 3 max_insert_length/max_lazy_match:
If not using lazy evaluation,
insert new strings in the hash table only if the match length is not
greater than this length.
If using lazy evaluation, only continue lazy evaluation,
if previous match length is strictly smaller than this value.
@field 4 nice_length:
Number. Don't continue to go down the hash chain,
if match length is above this.
@field 5 max_chain:
Number. The maximum number of hash chains we look.
--]]
local _compression_level_configs = {
[0] = {false, nil, 0, 0, 0}, -- level 0, no compression
[1] = {false, nil, 4, 8, 4}, -- level 1, similar to zlib level 1
[2] = {false, nil, 5, 18, 8}, -- level 2, similar to zlib level 2
[3] = {false, nil, 6, 32, 32}, -- level 3, similar to zlib level 3
[4] = {true, 4, 4, 16, 16}, -- level 4, similar to zlib level 4
[5] = {true, 8, 16, 32, 32}, -- level 5, similar to zlib level 5
[6] = {true, 8, 16, 128, 128}, -- level 6, similar to zlib level 6
[7] = {true, 8, 32, 128, 256}, -- (SLOW) level 7, similar to zlib level 7
[8] = {true, 32, 128, 258, 1024}, -- (SLOW) level 8,similar to zlib level 8
[9] = {true, 32, 258, 258, 4096}
-- (VERY SLOW) level 9, similar to zlib level 9
}
-- Check if the compression/decompression arguments is valid
-- @param str The input string.
-- @param check_dictionary if true, check if dictionary is valid.
-- @param dictionary The preset dictionary for compression and decompression.
-- @param check_configs if true, check if config is valid.
-- @param configs The compression configuration table
-- @return true if valid, false if not valid.
-- @return if not valid, the error message.
local function IsValidArguments(str, check_dictionary, dictionary,
check_configs, configs)
if type(str) ~= "string" then
return false, ("'str' - string expected got '%s'."):format(type(str))
end
if check_dictionary then
local dict_valid, dict_err = IsValidDictionary(dictionary)
if not dict_valid then return false, dict_err end
end
if check_configs then
local type_configs = type(configs)
if type_configs ~= "nil" and type_configs ~= "table" then
return false, ("'configs' - nil or table expected got '%s'."):format(
type(configs))
end
if type_configs == "table" then
for k, v in pairs(configs) do
if k ~= "level" and k ~= "strategy" then
return false,
("'configs' - unsupported table key in the configs: '%s'."):format(
k)
elseif k == "level" and not _compression_level_configs[v] then
return false,
("'configs' - unsupported 'level': %s."):format(tostring(v))
elseif k == "strategy" and v ~= "fixed" and v ~= "huffman_only" and v ~=
"dynamic" then
-- random_block_type is for testing purpose
return false, ("'configs' - unsupported 'strategy': '%s'."):format(
tostring(v))
end
end
end
end
return true, ""
end
--[[ --------------------------------------------------------------------------
Compress code
--]] --------------------------------------------------------------------------
-- partial flush to save memory
local _FLUSH_MODE_MEMORY_CLEANUP = 0
-- full flush with partial bytes
local _FLUSH_MODE_OUTPUT = 1
-- write bytes to get to byte boundary
local _FLUSH_MODE_BYTE_BOUNDARY = 2
-- no flush, just get num of bits written so far
local _FLUSH_MODE_NO_FLUSH = 3
--[[
Create an empty writer to easily write stuffs as the unit of bits.
Return values:
1. WriteBits(code, bitlen):
2. WriteString(str):
3. Flush(mode):
--]]
local function CreateWriter()
local buffer_size = 0
local cache = 0
local cache_bitlen = 0
local total_bitlen = 0
local buffer = {}
-- When buffer is big enough, flush into result_buffer to save memory.
local result_buffer = {}
-- Write bits with value "value" and bit length of "bitlen" into writer.
-- @param value: The value being written
-- @param bitlen: The bit length of "value"
-- @return nil
local function WriteBits(value, bitlen)
cache = cache + value * _pow2[cache_bitlen]
cache_bitlen = cache_bitlen + bitlen
total_bitlen = total_bitlen + bitlen
-- Only bulk to buffer every 4 bytes. This is quicker.
if cache_bitlen >= 32 then
buffer_size = buffer_size + 1
buffer[buffer_size] = _byte_to_char[cache % 256] ..
_byte_to_char[((cache - cache % 256) / 256 % 256)] ..
_byte_to_char[((cache - cache % 65536) / 65536 %
256)] ..
_byte_to_char[((cache - cache % 16777216) /
16777216 % 256)]
local rshift_mask = _pow2[32 - cache_bitlen + bitlen]
cache = (value - value % rshift_mask) / rshift_mask
cache_bitlen = cache_bitlen - 32
end
end
-- Write the entire string into the writer.
-- @param str The string being written
-- @return nil
local function WriteString(str)
for _ = 1, cache_bitlen, 8 do
buffer_size = buffer_size + 1
buffer[buffer_size] = string_char(cache % 256)
cache = (cache - cache % 256) / 256
end
cache_bitlen = 0
buffer_size = buffer_size + 1
buffer[buffer_size] = str
total_bitlen = total_bitlen + #str * 8
end
-- Flush current stuffs in the writer and return it.
-- This operation will free most of the memory.
-- @param mode See the descrtion of the constant and the source code.
-- @return The total number of bits stored in the writer right now.
-- for byte boundary mode, it includes the padding bits.
-- for output mode, it does not include padding bits.
-- @return Return the outputs if mode is output.
local function FlushWriter(mode)
if mode == _FLUSH_MODE_NO_FLUSH then return total_bitlen end
if mode == _FLUSH_MODE_OUTPUT or mode == _FLUSH_MODE_BYTE_BOUNDARY then
-- Full flush, also output cache.
-- Need to pad some bits if cache_bitlen is not multiple of 8.
local padding_bitlen = (8 - cache_bitlen % 8) % 8
if cache_bitlen > 0 then
-- padding with all 1 bits, mainly because "\000" is not
-- good to be tranmitted. I do this so "\000" is a little bit
-- less frequent.
cache = cache - _pow2[cache_bitlen] +
_pow2[cache_bitlen + padding_bitlen]
for _ = 1, cache_bitlen, 8 do
buffer_size = buffer_size + 1
buffer[buffer_size] = _byte_to_char[cache % 256]
cache = (cache - cache % 256) / 256
end
cache = 0
cache_bitlen = 0
end
if mode == _FLUSH_MODE_BYTE_BOUNDARY then
total_bitlen = total_bitlen + padding_bitlen
return total_bitlen
end
end
local flushed = table_concat(buffer)
buffer = {}
buffer_size = 0
result_buffer[#result_buffer + 1] = flushed
if mode == _FLUSH_MODE_MEMORY_CLEANUP then
return total_bitlen
else
return total_bitlen, table_concat(result_buffer)
end
end
return WriteBits, WriteString, FlushWriter
end
-- Push an element into a max heap
-- @param heap A max heap whose max element is at index 1.
-- @param e The element to be pushed. Assume element "e" is a table
-- and comparison is done via its first entry e[1]
-- @param heap_size current number of elements in the heap.
-- NOTE: There may be some garbage stored in
-- heap[heap_size+1], heap[heap_size+2], etc..
-- @return nil
local function MinHeapPush(heap, e, heap_size)
heap_size = heap_size + 1
heap[heap_size] = e
local value = e[1]
local pos = heap_size
local parent_pos = (pos - pos % 2) / 2
while (parent_pos >= 1 and heap[parent_pos][1] > value) do
local t = heap[parent_pos]
heap[parent_pos] = e
heap[pos] = t
pos = parent_pos
parent_pos = (parent_pos - parent_pos % 2) / 2
end
end
-- Pop an element from a max heap
-- @param heap A max heap whose max element is at index 1.
-- @param heap_size current number of elements in the heap.
-- @return the poped element
-- Note: This function does not change table size of "heap" to save CPU time.
local function MinHeapPop(heap, heap_size)
local top = heap[1]
local e = heap[heap_size]
local value = e[1]
heap[1] = e
heap[heap_size] = top
heap_size = heap_size - 1
local pos = 1
local left_child_pos = pos * 2
local right_child_pos = left_child_pos + 1
while (left_child_pos <= heap_size) do
local left_child = heap[left_child_pos]
if (right_child_pos <= heap_size and heap[right_child_pos][1] <
left_child[1]) then
local right_child = heap[right_child_pos]
if right_child[1] < value then
heap[right_child_pos] = e
heap[pos] = right_child
pos = right_child_pos
left_child_pos = pos * 2
right_child_pos = left_child_pos + 1
else
break
end
else
if left_child[1] < value then
heap[left_child_pos] = e
heap[pos] = left_child
pos = left_child_pos
left_child_pos = pos * 2
right_child_pos = left_child_pos + 1
else
break
end
end
end
return top
end
-- Deflate defines a special huffman tree, which is unique once the bit length
-- of huffman code of all symbols are known.
-- @param bitlen_count Number of symbols with a specific bitlen
-- @param symbol_bitlen The bit length of a symbol
-- @param max_symbol The max symbol among all symbols,
-- which is (number of symbols - 1)
-- @param max_bitlen The max huffman bit length among all symbols.
-- @return The huffman code of all symbols.
local function GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens,
max_symbol, max_bitlen)
local huffman_code = 0
local next_codes = {}
local symbol_huffman_codes = {}
for bitlen = 1, max_bitlen do
huffman_code = (huffman_code + (bitlen_counts[bitlen - 1] or 0)) * 2
next_codes[bitlen] = huffman_code
end
for symbol = 0, max_symbol do
local bitlen = symbol_bitlens[symbol]
if bitlen then
huffman_code = next_codes[bitlen]
next_codes[bitlen] = huffman_code + 1
-- Reverse the bits of huffman code,
-- because most signifant bits of huffman code
-- is stored first into the compressed data.
-- @see RFC1951 Page5 Section 3.1.1
if bitlen <= 9 then -- Have cached reverse for small bitlen.
symbol_huffman_codes[symbol] = _reverse_bits_tbl[bitlen][huffman_code]
else
local reverse = 0
for _ = 1, bitlen do
reverse = reverse - reverse % 2 +
(((reverse % 2 == 1) or (huffman_code % 2) == 1) and 1 or
0)
huffman_code = (huffman_code - huffman_code % 2) / 2
reverse = reverse * 2
end
symbol_huffman_codes[symbol] = (reverse - reverse % 2) / 2
end
end
end
return symbol_huffman_codes
end
-- A helper function to sort heap elements
-- a[1], b[1] is the huffman frequency
-- a[2], b[2] is the symbol value.
local function SortByFirstThenSecond(a, b)
return a[1] < b[1] or (a[1] == b[1] and a[2] < b[2])
end
-- Calculate the huffman bit length and huffman code.
-- @param symbol_count: A table whose table key is the symbol, and table value
-- is the symbol frenquency (nil means 0 frequency).
-- @param max_bitlen: See description of return value.
-- @param max_symbol: The maximum symbol
-- @return a table whose key is the symbol, and the value is the huffman bit
-- bit length. We guarantee that all bit length <= max_bitlen.
-- For 0<=symbol<=max_symbol, table value could be nil if the frequency
-- of the symbol is 0 or nil.
-- @return a table whose key is the symbol, and the value is the huffman code.
-- @return a number indicating the maximum symbol whose bitlen is not 0.
local function GetHuffmanBitlenAndCode(symbol_counts, max_bitlen, max_symbol)
local heap_size
local max_non_zero_bitlen_symbol = -1
local leafs = {}
local heap = {}
local symbol_bitlens = {}
local symbol_codes = {}
local bitlen_counts = {}
--[[
tree[1]: weight, temporarily used as parent and bitLengths
tree[2]: symbol
tree[3]: left child
tree[4]: right child
--]]
local number_unique_symbols = 0
for symbol, count in pairs(symbol_counts) do
number_unique_symbols = number_unique_symbols + 1
leafs[number_unique_symbols] = {count, symbol}
end
if (number_unique_symbols == 0) then
-- no code.
return {}, {}, -1
elseif (number_unique_symbols == 1) then
-- Only one code. In this case, its huffman code
-- needs to be assigned as 0, and bit length is 1.
-- This is the only case that the return result
-- represents an imcomplete huffman tree.
local symbol = leafs[1][2]
symbol_bitlens[symbol] = 1
symbol_codes[symbol] = 0
return symbol_bitlens, symbol_codes, symbol