Skip to content

Commit 9e228f9

Browse files
committed
add slz4
1 parent 1bb1b2a commit 9e228f9

File tree

3 files changed

+132
-85
lines changed

3 files changed

+132
-85
lines changed

README.md

+74-41
Original file line numberDiff line numberDiff line change
@@ -3,58 +3,91 @@ An implementaion of adaptive range coder for C++
33

44
# Benchmarks
55
## System
6-
Windows 10 Pro Ver.1803(Build 17134.228) 64bit
7-
Visual Studio 2017 Ver 15.5.2
8-
CPU: Intel Core i7-6700T
9-
Memory: 16GB (DDR4)
6+
Windows 10 Pro Ver.2004(Build 19041.508) 64bit
7+
Visual Studio Professional 2019 Version 16.7.5
8+
CPU: Intel Core i7-8700
9+
Memory: 32 GB (DDR4)
1010

1111
## Test data
1212
- [The Canterbury Corpus](http://corpus.canterbury.ac.nz/index.html)
13+
1314
## Result table
15+
16+
### Range Coder
17+
18+
|Name|Ratio|Compression (micro seconds)|Decompression (micro seconds)|
19+
|:---|:---|:---|:---|
20+
|alice29.txt|0.574532|1674|5964|
21+
|asyoulik.txt|0.605293|1416|4944|
22+
|cp.html|0.674836|276|957|
23+
|fields.c|0.672646|129|437|
24+
|grammar.lsp|0.718893|43|147|
25+
|kennedy.xls|0.452938|10656|39724|
26+
|lcet10.txt|0.585129|4698|16673|
27+
|plrabn12.txt|0.567788|5370|18808|
28+
|ptt5|0.157010|4946|19490|
29+
|sum|0.679759|481|1533|
30+
|xargs.1|0.735510|48|167|
31+
1432
### Adaptive Range Coder
15-
|Name|Ratio|Compression (ns)|Decompression (ns)|
33+
34+
|Name|Ratio|Compression (micro seconds)|Decompression (micro seconds)|
35+
|:---|:---|:---|:---|
36+
|alice29.txt|0.573000|3224|4966|
37+
|asyoulik.txt|0.603400|2716|4360|
38+
|cp.html|0.662480|569|857|
39+
|fields.c|0.642511|244|367|
40+
|grammar.lsp|0.619457|83|118|
41+
|kennedy.xls|0.447426|18369|22158|
42+
|lcet10.txt|0.584625|9151|14856|
43+
|plrabn12.txt|0.567367|11152|16143|
44+
|ptt5|0.152158|7112|9837|
45+
|sum|0.670450|814|1164|
46+
|xargs.1|0.648924|98|149|
47+
48+
### SLZ4
49+
|Name|Ratio|Compression (micro seconds)|Decompression (micro seconds)|
1650
|:---|:---|:---|:---|
17-
|alice29.txt|0.573000|5587|7150|
18-
|asyoulik.txt|0.603400|3769|5472|
19-
|cp.html|0.662480|785|1314|
20-
|fields.c|0.642511|476|582|
21-
|grammar.lsp|0.619457|110|168|
22-
|kennedy.xls|0.447426|25035|29346|
23-
|lcet10.txt|0.584625|13651|18465|
24-
|plrabn12.txt|0.567367|18652|21234|
25-
|ptt5|0.152158|10315|13233|
26-
|sum|0.670450|1104|1530|
27-
|xargs.1|0.648924|129|188|
51+
|alice29.txt|0.590510|1202|406|
52+
|asyoulik.txt|0.623499|1016|338|
53+
|cp.html|0.494452|184|45|
54+
|fields.c|0.479283|73|22|
55+
|grammar.lsp|0.527546|30|6|
56+
|kennedy.xls|0.362974|4801|1208|
57+
|lcet10.txt|0.552485|3124|1078|
58+
|plrabn12.txt|0.654747|4174|1360|
59+
|sum|0.501464|288|67|
60+
|xargs.1|0.637568|40|8|
2861

29-
### zlib
30-
|Name|Ratio|Compression (ns)|Decompression (ns)|
62+
### ZLib
63+
|Name|Ratio|Compression (micro seconds)|Decompression (micro seconds)|
3164
|:---|:---|:---|:---|
32-
|alice29.txt|0.357712|7935|8979|
33-
|asyoulik.txt|0.390617|6997|7856|
34-
|cp.html|0.323578|708|871|
35-
|fields.c|0.280000|324|401|
36-
|grammar.lsp|0.328406|147|183|
37-
|kennedy.xls|0.198100|33404|39149|
38-
|lcet10.txt|0.339549|21247|23925|
39-
|plrabn12.txt|0.405223|32987|36529|
40-
|ptt5|0.110022|9962|11931|
41-
|sum|0.339697|1669|1940|
42-
|xargs.1|0.410693|167|209|
65+
|alice29.txt|0.357712|12499|1598|
66+
|asyoulik.txt|0.390617|11068|1325|
67+
|cp.html|0.323578|1094|262|
68+
|fields.c|0.280000|734|124|
69+
|grammar.lsp|0.328406|241|54|
70+
|kennedy.xls|0.198100|62349|8008|
71+
|lcet10.txt|0.339549|39025|4607|
72+
|plrabn12.txt|0.405223|55098|5270|
73+
|ptt5|0.110022|20496|3429|
74+
|sum|0.339697|2839|389|
75+
|xargs.1|0.410693|245|65|
4376

4477
### LZ4
45-
|Name|Ratio|Compression (ns)|Decompression (ns)|
78+
|Name|Ratio|Compression (micro seconds)|Decompression (micro seconds)|
4679
|:---|:---|:---|:---|
47-
|alice29.txt|0.583205|477|564|
48-
|asyoulik.txt|0.636313|365|433|
49-
|cp.html|0.483884|61|76|
50-
|fields.c|0.467713|33|44|
51-
|grammar.lsp|0.513840|12|14|
52-
|kennedy.xls|0.363881|1934|2454|
53-
|lcet10.txt|0.546481|1235|1466|
54-
|plrabn12.txt|0.675691|1457|1715|
55-
|ptt5|0.169295|513|780|
56-
|sum|0.491946|88|112|
57-
|xargs.1|0.628815|13|15|
80+
|alice29.txt|0.583205|509|189|
81+
|asyoulik.txt|0.636313|447|102|
82+
|cp.html|0.483884|50|16|
83+
|fields.c|0.467713|21|7|
84+
|grammar.lsp|0.513840|7|1|
85+
|kennedy.xls|0.363881|1371|532|
86+
|lcet10.txt|0.546481|1034|345|
87+
|plrabn12.txt|0.675691|1280|271|
88+
|ptt5|0.169295|411|306|
89+
|sum|0.491946|113|22|
90+
|xargs.1|0.628815|10|1|
5891

5992
# Building
6093
First, make sure you clone this repository.

test/main.cpp

+1-4
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ int def_slz4(cpprcoder::MemoryStream& outStream, cpprcoder::u32 srcSize, cpprcod
5858
{
5959
slz4::SLZ4Context context;
6060
cpprcoder::s32 result = slz4::compress(context, outStream.capacity(), reinterpret_cast<slz4::u8*>(&outStream[0]), srcSize, reinterpret_cast<slz4::u8*>(src));
61-
6261
if(0 <= result) {
6362
outStream.resize(result);
6463
}
@@ -182,9 +181,6 @@ int def_lz4(cpprcoder::MemoryStream& outStream, cpprcoder::u32 srcSize, cpprcode
182181
int inf_lz4(cpprcoder::MemoryStream& outStream, cpprcoder::u32 srcSize, cpprcoder::u8* src)
183182
{
184183
cpprcoder::s32 result = LZ4_decompress_safe(reinterpret_cast<const char*>(src), reinterpret_cast<char*>(&outStream[0]), srcSize, outStream.size());
185-
if(0 <= result) {
186-
outStream.resize(result);
187-
}
188184
return result;
189185
}
190186
#endif
@@ -339,6 +335,7 @@ void run_slz4(const char* filepath)
339335

340336
for(size_t i=0; i<decstream.size(); ++i){
341337
if(src[i] != decstream[i]){
338+
printf("Error: %d != %d\n", src[i], decstream[i]);
342339
return;
343340
}
344341
}

test/slz4.h

+57-40
Original file line numberDiff line numberDiff line change
@@ -134,27 +134,27 @@ struct LZSSMatch
134134
};
135135

136136
/**
137-
@return The number of bytes written into 'dst' or a negative value if this function fails.
138-
@param capacity ... Byte size of a destination buffer 'dst'. This is at least more than the size which is reterned by the compressBound.
139-
@param dst ... A destination buffer.
140-
@param size ... Byte size of a source data. The maximum supported value for this is MAX_BLOCK_SIZE.
141-
@param src ... A source data.
142-
*/
137+
@return The number of bytes written into 'dst' or a negative value if this function fails.
138+
@param capacity ... Byte size of a destination buffer 'dst'. This is at least more than the size which is reterned by the compressBound.
139+
@param dst ... A destination buffer.
140+
@param size ... Byte size of a source data. The maximum supported value for this is MAX_BLOCK_SIZE.
141+
@param src ... A source data.
142+
*/
143143
s32 compress(SLZ4Context& context, u32 capacity, u8* dst, u32 size, const u8* src);
144144

145145
/**
146-
@return The maximum number of bytes that compression may write into a destination buffer.
147-
@param size ... Byte size of a source data. The maximum supported value for this is MAX_BLOCK_SIZE.
148-
*/
146+
@return The maximum number of bytes that compression may write into a destination buffer.
147+
@param size ... Byte size of a source data. The maximum supported value for this is MAX_BLOCK_SIZE.
148+
*/
149149
s32 compressBound(u32 size);
150150

151151
/**
152-
@return The number of bytes written into 'dst' or a negative value if this function fails.
153-
@param capacity ... Byte size of a destination buffer 'dst'. The maximum supported value for this is MAX_BLOCK_SIZE.
154-
@param dst ... A destination buffer.
155-
@param size ... Byte size of a compressed data 'src'.
156-
@param src ... A source compressed data.
157-
*/
152+
@return The number of bytes written into 'dst' or a negative value if this function fails.
153+
@param capacity ... Byte size of a destination buffer 'dst'. The maximum supported value for this is MAX_BLOCK_SIZE.
154+
@param dst ... A destination buffer.
155+
@param size ... Byte size of a compressed data 'src'.
156+
@param src ... A source compressed data.
157+
*/
158158
s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src);
159159

160160
} //namespace slz4
@@ -165,8 +165,8 @@ s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src);
165165
#include <immintrin.h>
166166

167167
#define XXH_INLINE_ALL
168-
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
169-
#define XXH_IMPLEMENTATION /* access definitions */
168+
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
169+
#define XXH_IMPLEMENTATION /* access definitions */
170170
#include <xxhash.h>
171171

172172
namespace slz4
@@ -187,7 +187,7 @@ namespace
187187

188188
u32 push(u32 code, u32 u)
189189
{
190-
return (code >> 8) | (u<<24);
190+
return (code >> 8) | (u << 24);
191191
}
192192

193193
u32 hash(u32 code)
@@ -365,27 +365,42 @@ namespace
365365
}
366366

367367
//-------------------------------------------------------------------
368-
void copy(u8* dst, const u8* src, u32 size)
368+
void set(u8* dst, s32 value, u32 size)
369369
{
370-
u32 s = size>>6;
370+
u32 s = size>>7;
371+
SLZ4_ASSERT(size == (s<<7));
372+
__m256i x = _mm256_set1_epi32(value);
371373
for(u32 i=0; i<s; ++i){
374+
_mm256_store_si256(reinterpret_cast<__m256i*>(dst), x);
375+
_mm256_store_si256(reinterpret_cast<__m256i*>(dst+32), x);
376+
_mm256_store_si256(reinterpret_cast<__m256i*>(dst+64), x);
377+
_mm256_store_si256(reinterpret_cast<__m256i*>(dst+96), x);
378+
dst += 128;
379+
}
380+
}
381+
382+
//-------------------------------------------------------------------
383+
void copy(u8* dst, const u8* src, u32 size)
384+
{
385+
u32 s = size >> 6;
386+
for(u32 i = 0; i < s; ++i) {
372387
_mm_storeu_ps(reinterpret_cast<f32*>(dst), _mm_loadu_ps(reinterpret_cast<const f32*>(src)));
373-
_mm_storeu_ps(reinterpret_cast<f32*>(dst+16), _mm_loadu_ps(reinterpret_cast<const f32*>(src+16)));
374-
_mm_storeu_ps(reinterpret_cast<f32*>(dst+32), _mm_loadu_ps(reinterpret_cast<const f32*>(src+32)));
375-
_mm_storeu_ps(reinterpret_cast<f32*>(dst+48), _mm_loadu_ps(reinterpret_cast<const f32*>(src+48)));
388+
_mm_storeu_ps(reinterpret_cast<f32*>(dst + 16), _mm_loadu_ps(reinterpret_cast<const f32*>(src + 16)));
389+
_mm_storeu_ps(reinterpret_cast<f32*>(dst + 32), _mm_loadu_ps(reinterpret_cast<const f32*>(src + 32)));
390+
_mm_storeu_ps(reinterpret_cast<f32*>(dst + 48), _mm_loadu_ps(reinterpret_cast<const f32*>(src + 48)));
376391
dst += 64;
377392
src += 64;
378393
}
379-
size -= (s<<6);
380-
s = size>>4;
381-
for(u32 i=0; i<s; ++i){
394+
size -= (s << 6);
395+
s = size >> 4;
396+
for(u32 i = 0; i < s; ++i) {
382397
_mm_storeu_ps(reinterpret_cast<f32*>(dst), _mm_loadu_ps(reinterpret_cast<const f32*>(src)));
383398
dst += 16;
384399
src += 16;
385400
}
386401

387-
s = size - (s<<4);
388-
for(u32 i=0; i<s; ++i){
402+
s = size - (s << 4);
403+
for(u32 i = 0; i < s; ++i) {
389404
dst[i] = src[i];
390405
}
391406
}
@@ -406,7 +421,8 @@ s32 compress(SLZ4Context& context, u32 capacity, u8* dst, u32 size, const u8* sr
406421
: -1;
407422
}
408423

409-
memset(context.entries_, -1, sizeof(s32) * DICTIONARY_SIZE);
424+
//memset(context.entries_, -1, sizeof(s32) * DICTIONARY_SIZE);
425+
set(reinterpret_cast<u8*>(context.entries_), -1, sizeof(s32)*DICTIONARY_SIZE);
410426
{ //Add the first code to our dictionary
411427
u32 code = pack(src);
412428
u32 index = hash(code) & (DICTIONARY_SIZE - 1);
@@ -418,7 +434,7 @@ s32 compress(SLZ4Context& context, u32 capacity, u8* dst, u32 size, const u8* sr
418434
u32 pending = 0;
419435
u32 position = 4;
420436
while(position < endMatch) {
421-
u32 code = pack(src+position);
437+
u32 code = pack(src + position);
422438
LZSSMatch match = findLongestMatch(context, code, src, position, end);
423439

424440
if(MIN_MATCH_LENGTH <= match.length_) {
@@ -454,7 +470,7 @@ s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src)
454470
{
455471
SLZ4_ASSERT(0 <= size);
456472
SLZ4_ASSERT(0 <= capacity && capacity <= MAX_BLOCK_SIZE);
457-
if(MAX_BLOCK_SIZE<capacity){
473+
if(MAX_BLOCK_SIZE < capacity) {
458474
return -1;
459475
}
460476
const u8* current = src;
@@ -464,10 +480,11 @@ s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src)
464480
u8* d = dst;
465481

466482
for(;;) {
467-
//Decode token
468483
if(end0 <= current) {
469-
return -1;
484+
break;
470485
}
486+
487+
//Decode token
471488
u32 literalLength = (current[0] >> 4);
472489
u32 matchLength = (current[0]) & 0xFU;
473490
++current;
@@ -478,13 +495,12 @@ s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src)
478495
}
479496

480497
//Read literals
481-
if(end0<(current + literalLength)){
498+
if(end0 < (current + literalLength)) {
482499
return -1;
483500
}
484-
if(dend<(d + literalLength)){
501+
if(dend < (d + literalLength)) {
485502
return -1;
486503
}
487-
//memcpy(d, current, literalLength);
488504
copy(d, current, literalLength);
489505
d += literalLength;
490506
current += literalLength;
@@ -503,15 +519,15 @@ s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src)
503519
}
504520

505521
//Copy match
506-
if(static_cast<s32>(d-dst) < offset) {
522+
if(static_cast<s32>(d - dst) < offset) {
507523
return -1;
508524
}
509525
matchLength += MIN_MATCH_LENGTH;
510-
if(dend<(d + matchLength)){
526+
if(dend < (d + matchLength)) {
511527
return -1;
512528
}
513529
if(16 <= offset) {
514-
copy(d, d-offset, matchLength);
530+
copy(d, d - offset, matchLength);
515531
d += matchLength;
516532
} else {
517533
while(0 < matchLength) {
@@ -521,7 +537,8 @@ s32 decompress(u32 capacity, u8* dst, u32 size, const u8* src)
521537
}
522538
}
523539
}
524-
return static_cast<s32>(d-dst);
540+
return static_cast<s32>(d - dst);
525541
}
542+
526543
} //namespace slz4
527544
#endif //SLZ4_IMPLEMENTATION

0 commit comments

Comments
 (0)