-
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathSimpleHuffman.pas
3369 lines (3001 loc) · 118 KB
/
SimpleHuffman.pas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{-------------------------------------------------------------------------------
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
-------------------------------------------------------------------------------}
{===============================================================================
SimpleHuffman
Provides classes for encoding and decoding of data using a Huffman tree,
as a method of loss-less data compression.
Note that this entire library was written "in the blind" - that is, with no
access to internet or any other relevant information source.
Current implementation is based pretty much only on a short video describing
how the Huffman tree works, that is all.
Therefore, the implementation is naive, used nomenclature is completely
arbitrary, there are no performance optimizations and it is probably bugged.
It was written only as a product of curiosity and should be seen as such.
Version 1.0 alpha 2 (2024-05-03)
Last change 2024-05-03
©2023-2024 František Milt
Contacts:
František Milt: [email protected]
Support:
If you find this code useful, please consider supporting its author(s) by
making a small donation using the following link(s):
https://www.paypal.me/FMilt
Changelog:
For detailed changelog and history please refer to this git repository:
github.com/TheLazyTomcat/Lib.SimpleHuffman
Dependencies:
AuxClasses - github.com/TheLazyTomcat/Lib.AuxClasses
* AuxExceptions - github.com/TheLazyTomcat/Lib.AuxExceptions
AuxTypes - github.com/TheLazyTomcat/Lib.AuxTypes
BitOps - github.com/TheLazyTomcat/Lib.BitOps
StaticMemoryStream - github.com/TheLazyTomcat/Lib.StaticMemoryStream
StrRect - github.com/TheLazyTomcat/Lib.StrRect
Library AuxExceptions is required only when rebasing local exception classes
(see symbol SimpleHuffman_UseAuxExceptions for details).
Library AuxExceptions might also be required as an indirect dependency.
Indirect dependencies:
BasicUIM - github.com/TheLazyTomcat/Lib.BasicUIM
SimpleCPUID - github.com/TheLazyTomcat/Lib.SimpleCPUID
UInt64Utils - github.com/TheLazyTomcat/Lib.UInt64Utils
WinFileInfo - github.com/TheLazyTomcat/Lib.WinFileInfo
===============================================================================}
unit SimpleHuffman;
{
SimpleHuffman_UseAuxExceptions
If you want library-specific exceptions to be based on more advanced classes
provided by AuxExceptions library instead of basic Exception class, and don't
want to or cannot change code in this unit, you can define global symbol
SimpleHuffman_UseAuxExceptions to achieve this.
}
{$IF Defined(SimpleHuffman_UseAuxExceptions)}
{$DEFINE UseAuxExceptions}
{$IFEND}
//------------------------------------------------------------------------------
{$IFDEF FPC}
{$MODE ObjFPC}
{$ENDIF}
{$H+}
//------------------------------------------------------------------------------
{
UseSecureTreeTraversal
When defined, decoding and decoded size obtaining uses more secure, but much
slower implementation.
Not defined by default.
To enable/define this symbol in a project without changing this library,
define project-wide symbol SimpleHuffman_UseSecureTreeTraversal_ON.
}
{$UNDEF UseSecureTreeTraversal}
{$IFDEF SimpleHuffman_UseSecureTreeTraversal_ON}
{$DEFINE UseSecureTreeTraversal}
{$ENDIF}
interface
uses
SysUtils, Classes,
AuxTypes, AuxClasses{$IFDEF UseAuxExceptions}, AuxExceptions{$ENDIF};
{===============================================================================
Library-specific exceptions
===============================================================================}
type
ESHException = class({$IFDEF UseAuxExceptions}EAEGeneralException{$ELSE}Exception{$ENDIF});
ESHInvalidValue = class(ESHException);
ESHInvalidState = class(ESHException);
ESHInvalidOperation = class(ESHException);
ESHIndexOutOfBounds = class(ESHException);
ESHBufferTooSmall = class(ESHException);
{===============================================================================
--------------------------------------------------------------------------------
THuffmanTree
--------------------------------------------------------------------------------
===============================================================================}
const
SH_HUFFTREE_LIST_BYTENODES = 0;
SH_HUFFTREE_LIST_TREENODES = 1;
type
TSHNodeKind = (nkByteSaved,nkByteUnsaved,nkBranch,nkTerminator);
// 256 bits, anyway what is the worst-case scenario?
TSHBitSequenceData = packed array[0..31] of UInt8;
TSHBitSequence = record
Length: TMemSize;
Data: TSHBitSequenceData;
end;
PSHHuffmanTreeNode = ^TSHHuffmanTreeNode;
TSHHuffmanTreeNode = record
NodeKind: TSHNodeKind;
ByteIndex: UInt8;
TreeIndex: Integer;
Frequency: Int64;
BitSequence: TSHBitSequence;
ParentNode: PSHHuffmanTreeNode;
ParentPath: Boolean;
ChildNodes: array[Boolean] of PSHHuffmanTreeNode;
SiblingNode: PSHHuffmanTreeNode;
end;
type
TSHTreeSavingScheme = (tssFullFreq,tssAvrgDiff);
TSHTreeSavingFlag = (tsfNone); // none implemented atm.
TSHTreeSavingFlags = set of TSHTreeSavingFlag;
{===============================================================================
THuffmanTree - class declaration
===============================================================================}
type
THuffmanTree = class(TCustomMultiListObject)
protected
fByteNodes: array[UInt8] of TSHHuffmanTreeNode;
fTerminatorNode: TSHHuffmanTreeNode;
fTreeNodes: array of PSHHuffmanTreeNode;
fTreeNodeCount: Integer;
fRootNode: PSHHuffmanTreeNode;
// getters, setters
Function GetByteNode(Index: Integer): TSHHuffmanTreeNode; virtual;
Function GetFrequency(Index: Integer): Int64; virtual;
procedure SetFrequency(Index: Integer; Value: Int64); virtual;
Function GetBitSequence(Index: Integer): TSHBitSequence; virtual;
Function GetNodeSaved(Index: Integer): Boolean; virtual;
procedure SetNodeSaved(Index: Integer; Value: Boolean); virtual;
Function GetTreeNodePtr(Index: Integer): PSHHuffmanTreeNode; virtual;
Function GetTreeNode(Index: Integer): TSHHuffmanTreeNode; virtual;
// inherited protected list methods
Function GetCapacity(List: Integer): Integer; override;
procedure SetCapacity(List,Value: Integer); override;
Function GetCount(List: Integer): Integer; override;
procedure SetCount(List,Value: Integer); override;
// tree building
Function AddTreeNode(Node: PSHHuffmanTreeNode): Integer; virtual;
{
streaming
*_0 ... tssFullFreq
*_1 ... tssAvrgDiff
}
Function CountSavedByteNodes: Integer; virtual;
Function EncodeHeader(Scheme: TSHTreeSavingScheme; Flags: TSHTreeSavingFlags): UInt8; virtual;
procedure DecodeHeader(Header: UInt8; out Scheme: TSHTreeSavingScheme; out Flags: TSHTreeSavingFlags); virtual;
Function PreloadAllocationSize(Stream: TStream): TMemSize; virtual;
Function StreamingSize_0(out FreqBits: Integer): TMemSize; virtual;
Function StreamingSize_1(out AvrgFreq: Int64; out AvrgBits,DiffBits: Integer): TMemSize; virtual;
procedure SaveToBuffer_0(out Buffer; Size: TMemSize); virtual;
procedure SaveToBuffer_1(out Buffer; Size: TMemSize); virtual;
procedure LoadFromBuffer_0(const Buffer; Size: TMemSize); virtual;
procedure LoadFromBuffer_1(const Buffer; Size: TMemSize); virtual;
// initialization and finalization
procedure ClearByteNodes; virtual;
procedure ClearTreeNodes; virtual;
procedure Initialize; virtual;
procedure Finalize; virtual;
public
constructor Create;
destructor Destroy; override;
// inherited public list methods
Function LowIndex(List: Integer): Integer; override;
Function HighIndex(List: Integer): Integer; override;
// new list methods
Function LowByteNodeIndex: Integer; virtual;
Function HighByteNodeIndex: Integer; virtual;
Function CheckByteNodeIndex(Index: UInt8): Boolean; virtual;
Function LowTreeNodeIndex: Integer; virtual;
Function HighTreeNodeIndex: Integer; virtual;
Function CheckTreeNodeIndex(Index: Integer): Boolean; virtual;
// frequency methods
Function IncreaseFrequency(ByteIndex: UInt8): Int64; virtual;
// tree methods
Function TreeIsReady: Boolean; virtual;
procedure ConstructTree; virtual;
procedure BuildTree(Frequencies: array of Int64); virtual;
procedure CopyTree(Tree: THuffmanTree); virtual;
Function SameTree(Tree: THuffmanTree): Boolean; virtual;
procedure ClearTree; virtual;
{
TraverseTree
Set TreeNodeIndex to an invalid (eg. negative) value before first call to
TraverseTree.
Returns true when returned TreeNodeIndex points to a branch node, false
otherwise.
}
Function TraverseTree(BitPath: Boolean; var TreeNodeIndex: Integer): Boolean; virtual;
// streaming (saving, loading)
Function StreamingSize(Scheme: TSHTreeSavingScheme = tssFullFreq): TMemSize; virtual;
Function BestStreamingSize(out Scheme: TSHTreeSavingScheme): TMemSize; virtual;
procedure SaveToBuffer(out Buffer; Size: TMemSize; Scheme: TSHTreeSavingScheme = tssFullFreq); virtual;
procedure SaveToStream(Stream: TStream; Scheme: TSHTreeSavingScheme = tssFullFreq); virtual;
procedure SaveToFile(const FileName: String; Scheme: TSHTreeSavingScheme = tssFullFreq); virtual;
procedure LoadFromBuffer(const Buffer; Size: TMemSize; out Scheme: TSHTreeSavingScheme); overload; virtual;
procedure LoadFromBuffer(const Buffer; Size: TMemSize); overload; virtual;
procedure LoadFromStream(Stream: TStream; out Scheme: TSHTreeSavingScheme); overload; virtual;
procedure LoadFromStream(Stream: TStream); overload; virtual;
procedure LoadFromFile(const FileName: String; out Scheme: TSHTreeSavingScheme); overload; virtual;
procedure LoadFromFile(const FileName: String); overload; virtual;
{
properties
Use tree node indices (eg. LowTreeNodeIndex) for properties TreeNodePtrs
and TreeNodes, for all other array properties use byte indices
(LowByteNodeIndex, ...).
}
property ByteNodes[Index: Integer]: TSHHuffmanTreeNode read GetByteNode; default;
property Frequencies[Index: Integer]: Int64 read GetFrequency write SetFrequency;
property BitSequences[Index: Integer]: TSHBitSequence read GetBitSequence;
{
NodeSaved[]
When working with a data that can only contain a limited set of bytes, it
might be desirable to not save full frequency list, only what is needed.
You can do so by setting NodeSaved property of bytes that are not present
to false (they all are true by default).
WARNING - remember to do the same before loading a saved tree.
}
property NodeSaved[Index: Integer]: Boolean read GetNodeSaved write SetNodeSaved;
property TreeNodePtrs[Index: Integer]: PSHHuffmanTreeNode read GetTreeNodePtr;
property TreeNodes[Index: Integer]: TSHHuffmanTreeNode read GetTreeNode;
property TerminatorNode: TSHHuffmanTreeNode read fTerminatorNode;
property RootNode: PSHHuffmanTreeNode read fRootNode;
property ByteNodeCount: Integer index SH_HUFFTREE_LIST_BYTENODES read GetCount;
property TreeNodeCount: Integer index SH_HUFFTREE_LIST_TREENODES read GetCount;
end;
{===============================================================================
--------------------------------------------------------------------------------
THuffmanBase
--------------------------------------------------------------------------------
===============================================================================}
{===============================================================================
THuffmanBase - class declaration
===============================================================================}
type
THuffmanBase = class(TCustomObject)
protected
fHuffmanTree: THuffmanTree;
fStreamBufferSize: TMemSize;
fUncompressedSize: Int64;
fCompressedSize: Int64;
fCompressionRatio: Double;
fBreakProcessing: Boolean;
fScanInitialized: Boolean;
fScanFinalized: Boolean;
fScanProgressCallback: TProgressCallback;
fScanProgressEvent: TProgressEvent;
procedure DoScanProgress(Progress: Double); virtual;
procedure Initialize; virtual;
procedure Finalize; virtual;
public
class Function StreamBufferSizeDefault: TMemSize; virtual;
constructor Create;
destructor Destroy; override;
{
BreakProcessing
You can call this method from within progress event or callback to break
out from a long processing.
This affects scanning (Scan*), encoded size obtaining (EncodedSize*),
encoding (Encode*), decoded size obtaining (DecodedSize*) and decoding
(Decode*).
}
Function BreakProcessing: Boolean; virtual;
{
scanning
Before encoding or decoding, it is necessary to provide a complete huffman
tree. For decoding, this is often done by loading a tree saved in encoding.
For encoding, the tree usually is not available in pre-computed form, but
must be computed for the specific data being encoded. Scanning is here for
this purpose.
Pass all data that are to be encoded to the scanning, and the tree will be
constructed from them. Also, the scanning returns number of bytes the data
will occupy when encoded.
}
procedure ScanInit; virtual;
procedure ScanUpdate(const Buffer; Size: TMemSize); virtual;
Function ScanFinal: Int64; virtual;
// scanning macros
Function ScanMemory(Memory: Pointer; Size: TMemSize): Int64; virtual;
Function ScanBuffer(const Buffer; Size: TMemSize): Int64; virtual;
Function ScanAnsiString(const Str: AnsiString): Int64; virtual;
Function ScanWideString(const Str: WideString): Int64; virtual;
Function ScanString(const Str: String): Int64; virtual;
Function ScanStream(Stream: TStream; Count: Int64 = -1): Int64; virtual;
Function ScanFile(const FileName: String): Int64; virtual;
// properties
property HuffmanTree: THuffmanTree read fHuffmanTree;
property StreamBufferSize: TMemSize read fStreamBufferSize write fStreamBufferSize;
property UncompressedSize: Int64 read fUncompressedSize;
property CompressedSize: Int64 read fCompressedSize;
property CompressionRatio: Double read fCompressionRatio;
// events
property OnScanProgressCallback: TProgressCallback read fScanProgressCallback write fScanProgressCallback;
property OnScanProgressEvent: TProgressEvent read fScanProgressEvent write fScanProgressEvent;
property OnScanProgress: TProgressEvent read fScanProgressEvent write fScanProgressEvent;
end;
{===============================================================================
--------------------------------------------------------------------------------
THuffmanEncoder
--------------------------------------------------------------------------------
===============================================================================}
{===============================================================================
THuffmanEncoder - class declaration
===============================================================================}
type
THuffmanEncoder = class(THuffmanBase)
protected
fEncodedSizeInitialized: Boolean;
fEncodedSizeFinalized: Boolean;
fEncodedSizeCounters: array[UInt8] of Int64;
fEncodedSizeProgressCallback: TProgressCallback;
fEncodedSizeProgressEvent: TProgressEvent;
fEncodeInitialized: Boolean;
fEncodeFinalized: Boolean;
fEncodeBuffer: Pointer;
fEncodeBufferBitCount: TMemSize;
fEncodeProgressCallback: TProgressCallback;
fEncodeProgressEvent: TProgressEvent;
procedure DoEncodedSizeProgress(Progress: Double); virtual;
procedure DoEncodeProgress(Progress: Double); virtual;
procedure Initialize; override;
procedure Finalize; override;
public
{
encoded size
Use encoded size obtaining to get number of bytes the provided data will
occupy when encoded using current huffman tree (the tree must be prepared).
Data are usually scanned before encoding, and the scanning provides the
encoded size too, therefore it is normally not necessaty to use encoded
size obtaining. It is here for a case where pre-computed huffman tree is
used for encoding, so scanning is not called, and one still needs to obtain
the encoded size.
Note that obtaining the encoded size is very fast, as there is little
processing done. The performance depends pretty much only on how fast can
the data be served.
}
procedure EncodedSizeInit; virtual;
procedure EncodedSizeUpdate(const Buffer; Size: TMemSize); virtual;
Function EncodedSizeFinal: Int64; virtual;
// encoded size macros
Function EncodedSizeMemory(Memory: Pointer; Size: TMemSize): Int64; virtual;
Function EncodedSizeBuffer(const Buffer; Size: TMemSize): Int64; virtual;
Function EncodedSizeAnsiString(const Str: AnsiString): Int64; virtual;
Function EncodedSizeWideString(const Str: WideString): Int64; virtual;
Function EncodedSizeString(const Str: String): Int64; virtual;
Function EncodedSizeStream(Stream: TStream; Count: Int64 = -1): Int64; virtual;
Function EncodedSizeFile(const FileName: String): Int64; virtual;
// encoding
{
EncodeInit
Initializes encoding.
Huffman tree must be prepared before calling EncodeInit.
}
procedure EncodeInit; virtual;
{
EncodeUpdate
Tries to encode as many input bytes as possible. The function returns when
either all input bytes were consumed or when both the output buffer and
internal processing buffer are full.
Set SizeIn to size of the BufferIn and SizeOut to size of BufferOut (this
buffer must be allocated by the caller).
Upon return, the SizeIn will contain number of bytes that were consumed
from BufferIn within the call, which might be less than was passed. SizeOut
will contain number of bytes that were written into BufferOut (also might
be less than specified).
}
procedure EncodeUpdate(const BufferIn; var SizeIn: TMemSize; out BufferOut; var SizeOut: TMemSize); virtual;
{
EncodeFinal
Stores all data held in the internal processing buffer into provided output
buffer (if they fit) and finalizes processing.
Set SizeOut to a size of BufferOut.
When the funtion succeeds (returns true), then the SizeOut will contain
number of bytes written into the BufferOut.
When the function fails (returns false), it can only be due to output
buffer being too small. The SizeOut will then be set to a minimum size the
output buffer needs to be (reallocate it as such and call this function
again).
}
Function EncodeFinal(out BufferOut; var SizeOut: TMemSize): Boolean; virtual;
{
encoding macros
WARNING - MemoryOut/BufferOut output buffers must be allocated by the
caller and SizeOut must contain their allocated size. StrOut
string variable parameters must also be allocated (string length
set).
You can get the required size, in bytes, when scanning the data,
or, for pre-computed huffman tree, using method for encoded size
obtaining (EncodedSize*)
}
procedure EncodeMemory(MemoryIn: Pointer; SizeIn: TMemSize; MemoryOut: Pointer; SizeOut: TMemSize); virtual;
procedure EncodeBuffer(const BufferIn; SizeIn: TMemSize; out BufferOut; SizeOut: TMemSize); virtual;
procedure EncodeAnsiString(const StrIn: AnsiString; var StrOut: AnsiString); virtual;
procedure EncodeWideString(const StrIn: WideString; var StrOut: WideString); virtual;
procedure EncodeString(const StrIn: String; var StrOut: String); virtual;
procedure EncodeStream(StreamIn: TStream; CountIn: Int64; StreamOut: TStream); overload; virtual;
procedure EncodeStream(StreamIn: TStream; StreamOut: TStream); overload; virtual;
procedure EncodeFile(const FileNameIn,FileNameOut: String); virtual;
// properties
property OnEncodedSizeProgressCallback: TProgressCallback read fEncodedSizeProgressCallback write fEncodedSizeProgressCallback;
property OnEncodedSizeProgressEvent: TProgressEvent read fEncodedSizeProgressEvent write fEncodedSizeProgressEvent;
property OnEncodedSizeProgress: TProgressEvent read fEncodedSizeProgressEvent write fEncodedSizeProgressEvent;
property OnEncodeProgressCallback: TProgressCallback read fEncodeProgressCallback write fEncodeProgressCallback;
property OnEncodeProgressEvent: TProgressEvent read fEncodeProgressEvent write fEncodeProgressEvent;
property OnEncodeProgress: TProgressEvent read fEncodeProgressEvent write fEncodeProgressEvent;
end;
{===============================================================================
--------------------------------------------------------------------------------
THuffmanDecoder
--------------------------------------------------------------------------------
===============================================================================}
type
// for internal use only
TSHDecodedSizeContext = record
TreeNodeIndex: Integer;
Counter: Int64;
Terminated: Boolean;
end;
// for internal use only
TSHDecodeContext = record
TreeNodeIndex: Integer;
TransferInBits: NativeUInt;
TransferInBitCount: TMemSize;
TransferOutByte: UInt8;
TransferOutByteSet: Boolean;
Terminated: Boolean;
end;
{===============================================================================
THuffmanDecoder - class declaration
===============================================================================}
type
THuffmanDecoder = class(THuffmanBase)
protected
fDecodedSizeInitialized: Boolean;
fDecodedSizeFinalized: Boolean;
fDecodedSizeContext: TSHDecodedSizeContext;
fDecodedSizeProgressCallback: TProgressCallback;
fDecodedSizeProgressEvent: TProgressEvent;
fDecodeInitialized: Boolean;
fDecodeFinalized: Boolean;
fDecodeContext: TSHDecodeContext;
fDecodeProgressCallback: TProgressCallback;
fDecodeProgressEvent: TProgressEvent;
procedure DoDecodedSizeProgress(Progress: Double); virtual;
procedure DoDecodeProgress(Progress: Double); virtual;
procedure Initialize; override;
procedure DecodeUpdateInternal(const BufferIn; var SizeIn: TMemSize; out BufferOut; var SizeOut: TMemSize); virtual;
public
{
decoded size
Scans the provided encoded data and calculates number of bytes necessary to
store the same data in decoded state (ie. decoded size).
WARNING - this process is more-or-less the same as full decoding, only
the decoded data are not saved anywhere, which means obtaining
decoded size can be quite time consuming.
It is therefore recommended, if the size is required prior to
decoding, to store unencoded size with the encoded data and
only load it.
}
procedure DecodedSizeInit; virtual;
Function DecodedSizeUpdate(const Buffer; Size: TMemSize): Boolean; virtual;
Function DecodedSizeFinal: Int64; virtual;
// decoded size macros
Function DecodedSizeMemory(Memory: Pointer; Size: TMemSize): Int64; virtual;
Function DecodedSizeBuffer(const Buffer; Size: TMemSize): Int64; virtual;
Function DecodedSizeAnsiString(const Str: AnsiString): Int64; virtual;
Function DecodedSizeWideString(const Str: WideString): Int64; virtual;
Function DecodedSizeString(const Str: String): Int64; virtual;
Function DecodedSizeStream(Stream: TStream; Count: Int64 = -1): Int64; virtual;
Function DecodedSizeFile(const FileName: String): Int64; virtual;
// decoding
{
Initializes decoding.
The tree must be prepared before making the call.
}
procedure DecodeInit; virtual;
{
Decodes as many bits from input buffer as possible. The decoding stops
either when all input bytes are consumed, when the output buffer becomes
full or when termination sequence is encountered.
The parameters work the same as in THuffmanEncoder.EncodeUpdate, see there
for details.
Returns true when termination seqence was not yet encountered, false when
it was. If termination sequence was encountered, you should stop decoding
right there, as no more data will be decoded even if you will provide more
input data.
}
Function DecodeUpdate(const BufferIn; var SizeIn: TMemSize; out BufferOut; var SizeOut: TMemSize): Boolean; virtual;
{
DecodeFinal
Only finalizes the decoding, no further processing is required.
If the encoded data were not complete then this method raises an exception
of type ESHInvalidState.
}
procedure DecodeFinal; virtual;
// decoding macros
procedure DecodeMemory(MemoryIn: Pointer; SizeIn: TMemSize; MemoryOut: Pointer; SizeOut: TMemSize); virtual;
procedure DecodeBuffer(const BufferIn; SizeIn: TMemSize; out BufferOut; SizeOut: TMemSize); virtual;
procedure DecodeAnsiString(const StrIn: AnsiString; var StrOut: AnsiString); virtual;
procedure DecodeWideString(const StrIn: WideString; var StrOut: WideString); virtual;
procedure DecodeString(const StrIn: String; var StrOut: String); virtual;
procedure DecodeStream(StreamIn: TStream; CountIn: Int64; StreamOut: TStream); overload; virtual;
procedure DecodeStream(StreamIn: TStream; StreamOut: TStream); overload; virtual;
procedure DecodeFile(const FileNameIn,FileNameOut: String); virtual;
// properties
property OnDecodedSizeProgressCallback: TProgressCallback read fDecodedSizeProgressCallback write fDecodedSizeProgressCallback;
property OnDecodedSizeProgressEvent: TProgressEvent read fDecodedSizeProgressEvent write fDecodedSizeProgressEvent;
property OnDecodedSizeProgress: TProgressEvent read fDecodedSizeProgressEvent write fDecodedSizeProgressEvent;
property OnDecodeProgressCallback: TProgressCallback read fDecodeProgressCallback write fDecodeProgressCallback;
property OnDecodeProgressEvent: TProgressEvent read fDecodeProgressEvent write fDecodeProgressEvent;
property OnDecodeProgress: TProgressEvent read fDecodeProgressEvent write fDecodeProgressEvent;
end;
implementation
uses
Math,
BitOps, StaticMemoryStream, StrRect;
{===============================================================================
Implementation constants
===============================================================================}
const
{$IF SizeOf(NativeUInt) = 8}
NativeUIntSizeShift = 3;
{$ELSEIF SizeOf(NativeUInt) = 4}
NativeUIntSizeShift = 2;
{$ELSE}
{$MESSAGE FATAL 'Unsupported NativeUInt size.'}
{$IFEND}
{===============================================================================
Auxiliary functions
===============================================================================}
procedure PutBitsAndMoveDest(var Destination: PUInt8; DstBitOffset: TMemSize; Source: PUInt8; BitCount: TMemSize);
var
Mask: UInt8;
Buffer: UInt8;
begin
{
This function assumes that destination and source are at completely different
memory locations (they do not overlap) and DstBitOffset is in interval [0,7].
}
If (DstBitOffset <> 0) or ((BitCount and 7) <> 0) then
begin
// arbitrary bitstring or bit position
// copy full octets
If DstBitOffset <> 0 then
begin
Mask := UInt8(UInt8($FF) shl DstBitOffset);
while BitCount >= 8 do
begin
Buffer := Source^;
Destination^ := (Destination^ and not Mask) or UInt8(Buffer shl DstBitOffset);
Inc(Destination);
Destination^ := (Destination^ and Mask) or (Buffer shr (8 - DstBitOffset));
// do NOT increase destination again
Inc(Source);
Dec(BitCount,8);
end;
end
else
begin
while BitCount >= 8 do
begin
Destination^ := Source^;
Inc(Destination);
Inc(Source);
Dec(BitCount,8);
end;
end;
// copy remaining bits, if any
If BitCount > 0 then
begin
Buffer := Source^;
If BitCount > (8 - DstBitOffset) then
begin
// writing into two bytes (note that if here, DstBitOffset cannot be 0)
Destination^ := (Destination^ and {Mask}(UInt8($FF) shr (8 - DstBitOffset))) or UInt8(Buffer shl DstBitOffset);
Inc(Destination);
Mask := UInt8(UInt8($FF) shl ((BitCount + DstBitOffset) and 7));
Destination^ := (Destination^ and Mask) or ((Buffer shr (8 - DstBitOffset)) and not Mask);
end
else
begin
// writing into only one byte
Mask := UInt8(UInt8($FF) shl DstBitOffset) and (UInt8($FF) shr (8 - DstBitOffset - BitCount));
Destination^ := (Destination^ and not Mask) or (UInt8(Buffer shl DstBitOffset) and Mask);
If (DstBitOffset + BitCount) >= 8 then
Inc(Destination);
end;
end;
end
else
begin
// integral bytes on byte boundary
Move(Source^,Destination^,BitCount shr 3);
Inc(Destination,BitCount shr 3);
end;
end;
//------------------------------------------------------------------------------
Function LoadIntegerBits(var Source: PUInt8; SrcBitOffset,BitCount: TMemSize): Int64;
var
DstBitOffset: TMemSize;
begin
// This function assumes that SrcBitOffset is in interval [0,7].
If BitCount < 64 then
begin
If BitCount > 0 then
begin
If SrcBitOffset <> 0 then
begin
If BitCount >= (8 - SrcBitOffset) then
begin
Result := Source^ shr SrcBitOffset;
Inc(Source);
Dec(BitCount,8 - SrcBitOffset);
DstBitOffset := 8 - SrcBitOffset;
while BitCount >= 8 do
begin
Result := Result or (Int64(Source^) shl DstBitOffset);
Dec(BitCount,8);
Inc(Source);
Inc(DstBitOffset,8);
end;
If BitCount > 0 then
Result := Result or (Int64(Source^ and (UInt8($FF) shr (8 - BitCount))) shl DstBitOffset);
end
else Result := (Source^ shr SrcBitOffset) and (UInt8($FF) shr (8 - BitCount));
end
else
begin
Move(Source^,Addr(Result)^,(BitCount + 7) shr 3);
// mask bits that are not supposed to be in the result
{$IFDEF ENDIAN_BIG}
Result := Int64(SwapEndian(UInt64(Result))) and (Int64($FFFFFFFFFFFFFFFF) shr (64 - BitCount));
{$ELSE}
Result := Result and (Int64($FFFFFFFFFFFFFFFF) shr (64 - BitCount));
{$ENDIF}
Inc(Source,BitCount shr 3);
end;
end
else Result := 0;
end
else raise ESHInvalidValue.CreateFmt('LoadFrequencyBits: Invalid value of frequency bits (%d).',[BitCount]);
end;
//------------------------------------------------------------------------------
Function TreeSavingSchemeToNum(Scheme: TSHTreeSavingScheme): Integer;
begin
case Scheme of
tssFullFreq: Result := 0;
tssAvrgDiff: Result := 1;
else
raise ESHInvalidValue.CreateFmt('TreeSavingSchemeToNum: Invalid tree saving scheme (%d).',[Ord(Scheme)]);
end;
end;
//------------------------------------------------------------------------------
Function NumToTreeSavingScheme(Num: Integer): TSHTreeSavingScheme;
begin
case Num of
0: Result := tssFullFreq;
1: Result := tssAvrgDiff;
else
raise ESHInvalidValue.CreateFmt('NumToTreeSavingScheme: Invalid tree saving scheme (%d).',[Num]);
end;
end;
//------------------------------------------------------------------------------
Function TreeSavingFlagsToNum(Flags: TSHTreeSavingFlags): Integer;
begin
Result := 0;
If tsfNone in Flags then; // do nothing, no flag implemented
end;
//------------------------------------------------------------------------------
Function NumToTreeSavingFlags(Num: Integer): TSHTreeSavingFlags;
begin
Result := [];
If Num and 1 <> 0 then; // do nothing, no flag implemented
end;
{===============================================================================
--------------------------------------------------------------------------------
THuffmanTree
--------------------------------------------------------------------------------
===============================================================================}
{===============================================================================
THuffmanTree - class implementation
===============================================================================}
{-------------------------------------------------------------------------------
THuffmanTree - protected methods
-------------------------------------------------------------------------------}
Function THuffmanTree.GetByteNode(Index: Integer): TSHHuffmanTreeNode;
begin
If CheckByteNodeIndex(Index) then
Result := fByteNodes[UInt8(Index)]
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.GetByteNode: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetFrequency(Index: Integer): Int64;
begin
If CheckByteNodeIndex(Index) then
Result := fByteNodes[UInt8(Index)].Frequency
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.GetFrequency: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
procedure THuffmanTree.SetFrequency(Index: Integer; Value: Int64);
begin
If CheckByteNodeIndex(Index) then
fByteNodes[UInt8(Index)].Frequency := Value
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.SetFrequency: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetBitSequence(Index: Integer): TSHBitSequence;
begin
If CheckByteNodeIndex(Index) then
Result := fByteNodes[UInt8(Index)].BitSequence
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.GetBitSequence: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetNodeSaved(Index: Integer): Boolean;
begin
If CheckByteNodeIndex(Index) then
Result := (fByteNodes[UInt8(Index)].NodeKind = nkByteSaved)
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.GetNodeSaved: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
procedure THuffmanTree.SetNodeSaved(Index: Integer; Value: Boolean);
begin
If CheckByteNodeIndex(Index) then
begin
// do not clear frequency
If Value then
fByteNodes[UInt8(Index)].NodeKind := nkByteSaved
else
fByteNodes[UInt8(Index)].NodeKind := nkByteUnsaved
end
else raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.SetNodeSaved: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetTreeNodePtr(Index: Integer): PSHHuffmanTreeNode;
begin
If CheckTreeNodeIndex(Index) then
Result := fTreeNodes[Index]
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.GetTreeNodePtr: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetTreeNode(Index: Integer): TSHHuffmanTreeNode;
begin
If CheckTreeNodeIndex(Index) then
Result := fTreeNodes[Index]^
else
raise ESHIndexOutOfBounds.CreateFmt('THuffmanTree.GetTreeNode: Index (%d) ouf of bounds.',[Index]);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetCapacity(List: Integer): Integer;
begin
case List of
SH_HUFFTREE_LIST_BYTENODES: Result := Length(fByteNodes);
SH_HUFFTREE_LIST_TREENODES: Result := Length(fTreeNodes);
else
raise ESHInvalidValue.CreateFmt('THuffmanTree.GetCapacity: Invalid list index (%d).',[List]);
end;
end;
//------------------------------------------------------------------------------
procedure THuffmanTree.SetCapacity(List,Value: Integer);
begin
case List of
SH_HUFFTREE_LIST_BYTENODES: raise ESHInvalidOperation.Create('THuffmanTree.SetCapacity: Cannot change capacity of byte nodes.');
{
Since capacity of tree nodes is only changed internally, and then only
increased, there is no need for complex checks and data protection.
}
SH_HUFFTREE_LIST_TREENODES: SetLength(fTreeNodes,Value);
else
raise ESHInvalidValue.CreateFmt('THuffmanTree.SetCapacity: Invalid list index (%d).',[List]);
end;
end;
//------------------------------------------------------------------------------
Function THuffmanTree.GetCount(List: Integer): Integer;
begin
case List of
SH_HUFFTREE_LIST_BYTENODES: Result := Length(fByteNodes);
SH_HUFFTREE_LIST_TREENODES: Result := fTreeNodeCount;
else
raise ESHInvalidValue.CreateFmt('THuffmanTree.GetCount: Invalid list index (%d).',[List]);
end;
end;
//------------------------------------------------------------------------------
procedure THuffmanTree.SetCount(List,Value: Integer);
begin
Value := List; // only to prevent warnings
case Value of
SH_HUFFTREE_LIST_BYTENODES,
SH_HUFFTREE_LIST_TREENODES:;
else
raise ESHInvalidValue.CreateFmt('THuffmanTree.SetCount: Invalid list index (%d).',[List]);
end;
end;
//------------------------------------------------------------------------------
Function THuffmanTree.AddTreeNode(Node: PSHHuffmanTreeNode): Integer;
var
i: Integer;
begin
{
NOTE - nodes are ordered from lowest frequency to highest.
}
Grow(SH_HUFFTREE_LIST_TREENODES);
// find index where to put the new node
Result := fTreeNodeCount;
For i := LowTreeNodeIndex to HighTreeNodeIndex do
If Node^.Frequency < fTreeNodes[i]^.Frequency then
begin
Result := i;
Break{For i};
end;
// inser the node at selected position
For i := HighTreeNodeIndex downto Result do
fTreeNodes[i + 1] := fTreeNodes[i];
fTreeNodes[Result] := Node;
Inc(fTreeNodeCount);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.CountSavedByteNodes: Integer;
var
i: Integer;
begin
Result := 0;
For i := LowByteNodeIndex to HighByteNodeIndex do
If fByteNodes[UInt8(i)].NodeKind = nkByteSaved then
Inc(Result);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.EncodeHeader(Scheme: TSHTreeSavingScheme; Flags: TSHTreeSavingFlags): UInt8;
begin
// scheme number
Result := TreeSavingSchemeToNum(Scheme) and $F;
// flag bits
Result := Result or ((TreeSavingFlagsToNum(Flags) and $F) shl 4);
end;
//------------------------------------------------------------------------------
procedure THuffmanTree.DecodeHeader(Header: UInt8; out Scheme: TSHTreeSavingScheme; out Flags: TSHTreeSavingFlags);
begin
Scheme := NumToTreeSavingScheme(Header and $F);
Flags := NumToTreeSavingFlags((Header shr 4) and $F);
end;
//------------------------------------------------------------------------------
Function THuffmanTree.PreloadAllocationSize(Stream: TStream): TMemSize;
var
OrigPos: Int64;
Buffer: array[0..15] of UInt8;
Scheme: TSHTreeSavingScheme;
Flags: TSHTreeSavingFlags;
AvrgBits: Integer;
DiffBits: Integer;
begin
Result := 0;
OrigPos := Stream.Position;
try
If (Stream.Size - Stream.Position) >= 1 then
begin
// load and decode header to get saving scheme
Stream.ReadBuffer(Addr(Buffer)^,1);
DecodeHeader(Buffer[Low(Buffer)],Scheme,Flags);
case Scheme of
tssFullFreq:
If (Stream.Size - Stream.Position) >= 1{6 bits} then
begin
Stream.ReadBuffer(Buffer,1);