diff --git a/lzfse/lzfse.h b/lzfse/lzfse.h index 75a14c4..573c85b 100644 --- a/lzfse/lzfse.h +++ b/lzfse/lzfse.h @@ -1,23 +1,8 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ #ifndef LZFSE_H #define LZFSE_H @@ -25,33 +10,6 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSI #include #include -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_MSC_VER) && !defined(__clang__) -# define __attribute__(X) -# pragma warning(disable : 4068) -#endif - -#if defined(LZFSE_DLL) -# if defined(_WIN32) || defined(__CYGWIN__) -# if defined(LZFSE_DLL_EXPORTS) -# define LZFSE_API __declspec(dllexport) -# else -# define LZFSE_API __declspec(dllimport) -# endif -# endif -#endif - -#if !defined(LZFSE_API) -# if __GNUC__ >= 4 -# define LZFSE_API __attribute__((visibility("default"))) -# else -# define LZFSE_API -# endif -#endif - /*! @abstract Get the required scratch buffer size to compress using LZFSE. */ size_t lzfse_encode_scratch_size(void); @@ -84,11 +42,9 @@ size_t lzfse_encode_scratch_size(void); * successfully compressed. If the input cannot be compressed to fit into * the provided buffer, or an error occurs, zero is returned, and the * contents of dst_buffer are unspecified. */ -size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, - size_t dst_size, - const uint8_t *__restrict src_buffer, - size_t src_size, - void *__restrict scratch_buffer); +size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, + const uint8_t *__restrict src_buffer, + size_t src_size, void *__restrict scratch_buffer); /*! @abstract Get the required scratch buffer size to decompress using LZFSE. */ size_t lzfse_decode_scratch_size(void); @@ -123,14 +79,8 @@ size_t lzfse_decode_scratch_size(void); * buffer to hold the entire expanded output, only the first dst_size bytes * will be written to the buffer and dst_size is returned. Note that this * behavior differs from that of lzfse_encode_buffer. */ -size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, - size_t dst_size, - const uint8_t *__restrict src_buffer, - size_t src_size, - void *__restrict scratch_buffer); - -#ifdef __cplusplus -} /* extern "C" */ -#endif +size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, + const uint8_t *__restrict src_buffer, + size_t src_size, void *__restrict scratch_buffer); #endif /* LZFSE_H */ diff --git a/lzfse/lzfse_decode.c b/lzfse/lzfse_decode.c index c3b4f37..ef7d348 100644 --- a/lzfse/lzfse_decode.c +++ b/lzfse/lzfse_decode.c @@ -1,74 +1,66 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZFSE decode API +/* LZFSE decode API + */ #include #include "lzfse.h" #include "lzfse_internal.h" -size_t lzfse_decode_scratch_size(void) { return sizeof(lzfse_decoder_state); } +size_t lzfse_decode_scratch_size(void) +{ + return sizeof(lzfse_decoder_state); +} -static size_t lzfse_decode_buffer_with_scratch(uint8_t *__restrict dst_buffer, - size_t dst_size, const uint8_t *__restrict src_buffer, - size_t src_size, void *__restrict scratch_buffer) { - int status; - lzfse_decoder_state *s = (lzfse_decoder_state *)scratch_buffer; - memset(s, 0x00, sizeof(*s)); +static size_t lzfse_decode_buffer_with_scratch( + uint8_t *__restrict dst_buffer, size_t dst_size, + const uint8_t *__restrict src_buffer, size_t src_size, + void *__restrict scratch_buffer) +{ + int status; + lzfse_decoder_state *s = (lzfse_decoder_state *)scratch_buffer; + memset(s, 0x00, sizeof(*s)); - // Initialize state - s->src = src_buffer; - s->src_begin = src_buffer; - s->src_end = s->src + src_size; - s->dst = dst_buffer; - s->dst_begin = dst_buffer; - s->dst_end = dst_buffer + dst_size; + // Initialize state + s->src = src_buffer; + s->src_begin = src_buffer; + s->src_end = s->src + src_size; + s->dst = dst_buffer; + s->dst_begin = dst_buffer; + s->dst_end = dst_buffer + dst_size; - // Decode - status = lzfse_decode(s); - if (status == LZFSE_STATUS_DST_FULL) - return dst_size; - if (status != LZFSE_STATUS_OK) - return 0; // failed - return (size_t)(s->dst - dst_buffer); // bytes written + // Decode + status = lzfse_decode(s); + if (status == LZFSE_STATUS_DST_FULL) + return dst_size; + if (status != LZFSE_STATUS_OK) + return 0; // failed + return (size_t)(s->dst - dst_buffer); // bytes written } size_t lzfse_decode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, - const uint8_t *__restrict src_buffer, - size_t src_size, void *__restrict scratch_buffer) { - int has_malloc = 0; - size_t ret = 0; + const uint8_t *__restrict src_buffer, + size_t src_size, void *__restrict scratch_buffer) +{ + int has_malloc = 0; + size_t ret = 0; - // Deal with the possible NULL pointer - if (scratch_buffer == NULL) { - // +1 in case scratch size could be zero - scratch_buffer = kmalloc(lzfse_decode_scratch_size() + 1, GFP_KERNEL); - has_malloc = 1; - } - if (scratch_buffer == NULL) - return 0; - ret = lzfse_decode_buffer_with_scratch(dst_buffer, - dst_size, src_buffer, - src_size, scratch_buffer); - if (has_malloc) - kfree(scratch_buffer); - return ret; + // Deal with the possible NULL pointer + if (scratch_buffer == NULL) { + // +1 in case scratch size could be zero + scratch_buffer = + kmalloc(lzfse_decode_scratch_size() + 1, GFP_KERNEL); + has_malloc = 1; + } + if (scratch_buffer == NULL) + return 0; + ret = lzfse_decode_buffer_with_scratch(dst_buffer, dst_size, src_buffer, + src_size, scratch_buffer); + if (has_malloc) + kfree(scratch_buffer); + return ret; } diff --git a/lzfse/lzfse_decode_base.c b/lzfse/lzfse_decode_base.c index 3f3bfe3..5fb9e37 100644 --- a/lzfse/lzfse_decode_base.c +++ b/lzfse/lzfse_decode_base.c @@ -1,23 +1,8 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ #include "lzfse_internal.h" #include "lzvn_decode_base.h" @@ -25,26 +10,29 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSI /*! @abstract Decode an entry value from next bits of stream. * Return \p value, and set \p *nbits to the number of bits to consume * (starting with LSB). */ -static inline int lzfse_decode_v1_freq_value(uint32_t bits, int *nbits) { - static const int8_t lzfse_freq_nbits_table[32] = { - 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14, - 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14}; - static const int8_t lzfse_freq_value_table[32] = { - 0, 2, 1, 4, 0, 3, 1, -1, 0, 2, 1, 5, 0, 3, 1, -1, - 0, 2, 1, 6, 0, 3, 1, -1, 0, 2, 1, 7, 0, 3, 1, -1}; - - uint32_t b = bits & 31; // lower 5 bits - int n = lzfse_freq_nbits_table[b]; - *nbits = n; - - // Special cases for > 5 bits encoding - if (n == 8) - return 8 + ((bits >> 4) & 0xf); - if (n == 14) - return 24 + ((bits >> 4) & 0x3ff); - - // <= 5 bits encoding from table - return lzfse_freq_value_table[b]; +static inline int lzfse_decode_v1_freq_value(uint32_t bits, int *nbits) +{ + static const int8_t lzfse_freq_nbits_table[32] = { + 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14, + 2, 3, 2, 5, 2, 3, 2, 8, 2, 3, 2, 5, 2, 3, 2, 14 + }; + static const int8_t lzfse_freq_value_table[32] = { + 0, 2, 1, 4, 0, 3, 1, -1, 0, 2, 1, 5, 0, 3, 1, -1, + 0, 2, 1, 6, 0, 3, 1, -1, 0, 2, 1, 7, 0, 3, 1, -1 + }; + + uint32_t b = bits & 31; // lower 5 bits + int n = lzfse_freq_nbits_table[b]; + *nbits = n; + + // Special cases for > 5 bits encoding + if (n == 8) + return 8 + ((bits >> 4) & 0xf); + if (n == 14) + return 24 + ((bits >> 4) & 0x3ff); + + // <= 5 bits encoding from table + return lzfse_freq_value_table[b]; } /*! @abstract Extracts up to 32 bits from a 64-bit field beginning at @@ -52,16 +40,18 @@ static inline int lzfse_decode_v1_freq_value(uint32_t bits, int *nbits) { * * If we number the bits of \p v from 0 (least significant) to 63 (most * significant), the result is bits \p offset to \p offset+nbits-1. */ -static inline uint32_t get_field(uint64_t v, int offset, int nbits) { - if (nbits == 32) - return (uint32_t)(v >> offset); - return (uint32_t)((v >> offset) & ((1 << nbits) - 1)); +static inline uint32_t get_field(uint64_t v, int offset, int nbits) +{ + if (nbits == 32) + return (uint32_t)(v >> offset); + return (uint32_t)((v >> offset) & ((1 << nbits) - 1)); } /*! @abstract Return \c header_size field from a \c lzfse_compressed_block_header_v2. */ static inline uint32_t -lzfse_decode_v2_header_size(const lzfse_compressed_block_header_v2 *in) { - return get_field(in->packed_fields[2], 0, 32); +lzfse_decode_v2_header_size(const lzfse_compressed_block_header_v2 *in) +{ + return get_field(in->packed_fields[2], 0, 32); } /*! @abstract Decode all fields from a \c lzfse_compressed_block_header_v2 to a @@ -69,584 +59,667 @@ lzfse_decode_v2_header_size(const lzfse_compressed_block_header_v2 *in) { * @return 0 on success. * @return -1 on failure. */ static inline int lzfse_decode_v1(lzfse_compressed_block_header_v1 *out, - const lzfse_compressed_block_header_v2 *in) { - uint64_t v0; - uint64_t v1; - uint64_t v2; - uint16_t *dst = NULL; - const uint8_t *src = NULL; - const uint8_t *src_end = NULL; - uint32_t accum = 0; - int accum_nbits = 0; - int nbits = 0; - int i; - - // Clear all fields - memset(out, 0x00, sizeof(lzfse_compressed_block_header_v1)); - - v0 = in->packed_fields[0]; - v1 = in->packed_fields[1]; - v2 = in->packed_fields[2]; - - out->magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; - out->n_raw_bytes = in->n_raw_bytes; - - // Literal state - out->n_literals = get_field(v0, 0, 20); - out->n_literal_payload_bytes = get_field(v0, 20, 20); - out->literal_bits = (int)get_field(v0, 60, 3) - 7; - out->literal_state[0] = get_field(v1, 0, 10); - out->literal_state[1] = get_field(v1, 10, 10); - out->literal_state[2] = get_field(v1, 20, 10); - out->literal_state[3] = get_field(v1, 30, 10); - - // L,M,D state - out->n_matches = get_field(v0, 40, 20); - out->n_lmd_payload_bytes = get_field(v1, 40, 20); - out->lmd_bits = (int)get_field(v1, 60, 3) - 7; - out->l_state = get_field(v2, 32, 10); - out->m_state = get_field(v2, 42, 10); - out->d_state = get_field(v2, 52, 10); - - // Total payload size - out->n_payload_bytes = - out->n_literal_payload_bytes + out->n_lmd_payload_bytes; - - // Freq tables - dst = &(out->l_freq[0]); - src = &(in->freq[0]); - src_end = - (const uint8_t *)in + get_field(v2, 0, 32); // first byte after header - accum = 0; - accum_nbits = 0; - - // No freq tables? - if (src_end == src) - return 0; // OK, freq tables were omitted - - for (i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + - LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; - i++) { - // Refill accum, one byte at a time, until we reach end of header, or accum - // is full - while (src < src_end && accum_nbits + 8 <= 32) { - accum |= (uint32_t)(*src) << accum_nbits; - accum_nbits += 8; - src++; - } - - // Decode and store value - nbits = 0; - dst[i] = lzfse_decode_v1_freq_value(accum, &nbits); - - if (nbits > accum_nbits) - return -1; // failed - - // Consume nbits bits - accum >>= nbits; - accum_nbits -= nbits; - } - - if (accum_nbits >= 8 || src != src_end) - return -1; // we need to end up exactly at the end of header, with less than - // 8 bits in accumulator - - return 0; + const lzfse_compressed_block_header_v2 *in) +{ + uint64_t v0; + uint64_t v1; + uint64_t v2; + uint16_t *dst = NULL; + const uint8_t *src = NULL; + const uint8_t *src_end = NULL; + uint32_t accum = 0; + int accum_nbits = 0; + int nbits = 0; + int i; + + // Clear all fields + memset(out, 0x00, sizeof(lzfse_compressed_block_header_v1)); + + v0 = in->packed_fields[0]; + v1 = in->packed_fields[1]; + v2 = in->packed_fields[2]; + + out->magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; + out->n_raw_bytes = in->n_raw_bytes; + + // Literal state + out->n_literals = get_field(v0, 0, 20); + out->n_literal_payload_bytes = get_field(v0, 20, 20); + out->literal_bits = (int)get_field(v0, 60, 3) - 7; + out->literal_state[0] = get_field(v1, 0, 10); + out->literal_state[1] = get_field(v1, 10, 10); + out->literal_state[2] = get_field(v1, 20, 10); + out->literal_state[3] = get_field(v1, 30, 10); + + // L,M,D state + out->n_matches = get_field(v0, 40, 20); + out->n_lmd_payload_bytes = get_field(v1, 40, 20); + out->lmd_bits = (int)get_field(v1, 60, 3) - 7; + out->l_state = get_field(v2, 32, 10); + out->m_state = get_field(v2, 42, 10); + out->d_state = get_field(v2, 52, 10); + + // Total payload size + out->n_payload_bytes = + out->n_literal_payload_bytes + out->n_lmd_payload_bytes; + + // Freq tables + dst = &(out->l_freq[0]); + src = &(in->freq[0]); + src_end = (const uint8_t *)in + + get_field(v2, 0, 32); // first byte after header + accum = 0; + accum_nbits = 0; + + // No freq tables? + if (src_end == src) + return 0; // OK, freq tables were omitted + + for (i = 0; + i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + + LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; + i++) { + // Refill accum, one byte at a time, until we reach end of header, or accum + // is full + while (src < src_end && accum_nbits + 8 <= 32) { + accum |= (uint32_t)(*src) << accum_nbits; + accum_nbits += 8; + src++; + } + + // Decode and store value + nbits = 0; + dst[i] = lzfse_decode_v1_freq_value(accum, &nbits); + + if (nbits > accum_nbits) + return -1; // failed + + // Consume nbits bits + accum >>= nbits; + accum_nbits -= nbits; + } + + if (accum_nbits >= 8 || src != src_end) + return -1; // we need to end up exactly at the end of header, with less than + // 8 bits in accumulator + + return 0; } -static inline void copy(uint8_t *dst, const uint8_t *src, size_t length) { - const uint8_t *dst_end = dst + length; - do { - copy8(dst, src); - dst += 8; - src += 8; - } while (dst < dst_end); +static inline void copy(uint8_t *dst, const uint8_t *src, size_t length) +{ + const uint8_t *dst_end = dst + length; + do { + copy8(dst, src); + dst += 8; + src += 8; + } while (dst < dst_end); } -static int lzfse_decode_lmd(lzfse_decoder_state *s) { - lzfse_compressed_block_decoder_state *bs = &(s->compressed_lzfse_block_state); - fse_state l_state = bs->l_state; - fse_state m_state = bs->m_state; - fse_state d_state = bs->d_state; - fse_in_stream in = bs->lmd_in_stream; - const uint8_t *src_start = s->src_begin; - const uint8_t *src = s->src + bs->lmd_in_buf; - const uint8_t *lit = bs->current_literal; - uint8_t *dst = s->dst; - uint32_t symbols = bs->n_matches; - int32_t L = bs->l_value; - int32_t M = bs->m_value; - int32_t D = bs->d_value; - int32_t new_d; - - // Number of bytes remaining in the destination buffer, minus 32 to - // provide a margin of safety for using overlarge copies on the fast path. - // This is a signed quantity, and may go negative when we are close to the - // end of the buffer. That's OK; we're careful about how we handle it - // in the slow-and-careful match execution path. - ptrdiff_t remaining_bytes = s->dst_end - dst - 32; - - // If L or M is non-zero, that means that we have already started decoding - // this block, and that we needed to interrupt decoding to get more space - // from the caller. There's a pending L, M, D triplet that we weren't - // able to completely process. Jump ahead to finish executing that symbol - // before decoding new values. - if (L || M) - goto ExecuteMatch; - - while (symbols > 0) { - int res; - // Decode the next L, M, D symbol from the input stream. - res = fse_in_flush(&in, &src, src_start); - if (res) { - return LZFSE_STATUS_ERROR; - } - L = fse_value_decode(&l_state, bs->l_decoder, &in); - if ((lit + L) >= (bs->literals + LZFSE_LITERALS_PER_BLOCK + 64)) { - return LZFSE_STATUS_ERROR; - } - res = fse_in_flush2(&in, &src, src_start); - if (res) { - return LZFSE_STATUS_ERROR; - } - M = fse_value_decode(&m_state, bs->m_decoder, &in); - res = fse_in_flush2(&in, &src, src_start); - if (res) { - return LZFSE_STATUS_ERROR; - } - new_d = fse_value_decode(&d_state, bs->d_decoder, &in); - D = new_d ? new_d : D; - symbols--; - - ExecuteMatch: - // Error if D is out of range, so that we avoid passing through - // uninitialized data or accesssing memory out of the destination - // buffer. - if ((uint32_t)D > dst + L - s->dst_begin) - return LZFSE_STATUS_ERROR; - - if (L + M <= remaining_bytes) { - size_t i; - // If we have plenty of space remaining, we can copy the literal - // and match with 16- and 32-byte operations, without worrying - // about writing off the end of the buffer. - remaining_bytes -= L + M; - copy(dst, lit, L); - dst += L; - lit += L; - // For the match, we have two paths; a fast copy by 16-bytes if - // the match distance is large enough to allow it, and a more - // careful path that applies a permutation to account for the - // possible overlap between source and destination if the distance - // is small. - if (D >= 8 || D >= M) - copy(dst, dst - D, M); - else - for (i = 0; i < M; i++) - dst[i] = dst[i - D]; - dst += M; - } - - else { - // Otherwise, we are very close to the end of the destination - // buffer, so we cannot use wide copies that slop off the end - // of the region that we are copying to. First, we restore - // the true length remaining, rather than the sham value we've - // been using so far. - remaining_bytes += 32; - // Now, we process the literal. Either there's space for it - // or there isn't; if there is, we copy the whole thing and - // update all the pointers and lengths to reflect the copy. - if (L <= remaining_bytes) { - size_t i; - for (i = 0; i < L; i++) - dst[i] = lit[i]; - dst += L; - lit += L; - remaining_bytes -= L; - L = 0; - } - // There isn't enough space to fit the whole literal. Copy as - // much of it as we can, update the pointers and the value of - // L, and report that the destination buffer is full. Note that - // we always write right up to the end of the destination buffer. - else { - size_t i; - for (i = 0; i < remaining_bytes; i++) - dst[i] = lit[i]; - dst += remaining_bytes; - lit += remaining_bytes; - L -= remaining_bytes; - goto DestinationBufferIsFull; - } - // The match goes just like the literal does. We copy as much as - // we can byte-by-byte, and if we reach the end of the buffer - // before finishing, we return to the caller indicating that - // the buffer is full. - if (M <= remaining_bytes) { - size_t i; - for (i = 0; i < M; i++) - dst[i] = dst[i - D]; - dst += M; - remaining_bytes -= M; - M = 0; - (void)M; // no dead store warning - // We don't need to update M = 0, because there's no partial - // symbol to continue executing. Either we're at the end of - // the block, in which case we will never need to resume with - // this state, or we're going to decode another L, M, D set, - // which will overwrite M anyway. - // - // But we still set M = 0, to maintain the post-condition. - } else { - size_t i; - for (i = 0; i < remaining_bytes; i++) - dst[i] = dst[i - D]; - dst += remaining_bytes; - M -= remaining_bytes; - DestinationBufferIsFull: - // Because we want to be able to resume decoding where we've left - // off (even in the middle of a literal or match), we need to - // update all of the block state fields with the current values - // so that we can resume execution from this point once the - // caller has given us more space to write into. - bs->l_value = L; - bs->m_value = M; - bs->d_value = D; - bs->l_state = l_state; - bs->m_state = m_state; - bs->d_state = d_state; - bs->lmd_in_stream = in; - bs->n_matches = symbols; - bs->lmd_in_buf = (uint32_t)(src - s->src); - bs->current_literal = lit; - s->dst = dst; - return LZFSE_STATUS_DST_FULL; - } - // Restore the "sham" decremented value of remaining_bytes and - // continue to the next L, M, D triple. We'll just be back in - // the careful path again, but this only happens at the very end - // of the buffer, so a little minor inefficiency here is a good - // tradeoff for simpler code. - remaining_bytes -= 32; - } - } - // Because we've finished with the whole block, we don't need to update - // any of the blockstate fields; they will not be used again. We just - // update the destination pointer in the state object and return. - s->dst = dst; - return LZFSE_STATUS_OK; +static int lzfse_decode_lmd(lzfse_decoder_state *s) +{ + lzfse_compressed_block_decoder_state *bs = + &(s->compressed_lzfse_block_state); + fse_state l_state = bs->l_state; + fse_state m_state = bs->m_state; + fse_state d_state = bs->d_state; + fse_in_stream in = bs->lmd_in_stream; + const uint8_t *src_start = s->src_begin; + const uint8_t *src = s->src + bs->lmd_in_buf; + const uint8_t *lit = bs->current_literal; + uint8_t *dst = s->dst; + uint32_t symbols = bs->n_matches; + int32_t L = bs->l_value; + int32_t M = bs->m_value; + int32_t D = bs->d_value; + int32_t new_d; + + // Number of bytes remaining in the destination buffer, minus 32 to + // provide a margin of safety for using overlarge copies on the fast path. + // This is a signed quantity, and may go negative when we are close to the + // end of the buffer. That's OK; we're careful about how we handle it + // in the slow-and-careful match execution path. + ptrdiff_t remaining_bytes = s->dst_end - dst - 32; + + // If L or M is non-zero, that means that we have already started decoding + // this block, and that we needed to interrupt decoding to get more space + // from the caller. There's a pending L, M, D triplet that we weren't + // able to completely process. Jump ahead to finish executing that symbol + // before decoding new values. + if (L || M) + goto ExecuteMatch; + + while (symbols > 0) { + int res; + // Decode the next L, M, D symbol from the input stream. + res = fse_in_flush(&in, &src, src_start); + if (res) { + return LZFSE_STATUS_ERROR; + } + L = fse_value_decode(&l_state, bs->l_decoder, &in); + if ((lit + L) >= + (bs->literals + LZFSE_LITERALS_PER_BLOCK + 64)) { + return LZFSE_STATUS_ERROR; + } + res = fse_in_flush2(&in, &src, src_start); + if (res) { + return LZFSE_STATUS_ERROR; + } + M = fse_value_decode(&m_state, bs->m_decoder, &in); + res = fse_in_flush2(&in, &src, src_start); + if (res) { + return LZFSE_STATUS_ERROR; + } + new_d = fse_value_decode(&d_state, bs->d_decoder, &in); + D = new_d ? new_d : D; + symbols--; + +ExecuteMatch: + // Error if D is out of range, so that we avoid passing through + // uninitialized data or accesssing memory out of the destination + // buffer. + if ((uint32_t)D > dst + L - s->dst_begin) + return LZFSE_STATUS_ERROR; + + if (L + M <= remaining_bytes) { + size_t i; + // If we have plenty of space remaining, we can copy the literal + // and match with 16- and 32-byte operations, without worrying + // about writing off the end of the buffer. + remaining_bytes -= L + M; + copy(dst, lit, L); + dst += L; + lit += L; + // For the match, we have two paths; a fast copy by 16-bytes if + // the match distance is large enough to allow it, and a more + // careful path that applies a permutation to account for the + // possible overlap between source and destination if the distance + // is small. + if (D >= 8 || D >= M) + copy(dst, dst - D, M); + else + for (i = 0; i < M; i++) + dst[i] = dst[i - D]; + dst += M; + } + + else { + // Otherwise, we are very close to the end of the destination + // buffer, so we cannot use wide copies that slop off the end + // of the region that we are copying to. First, we restore + // the true length remaining, rather than the sham value we've + // been using so far. + remaining_bytes += 32; + // Now, we process the literal. Either there's space for it + // or there isn't; if there is, we copy the whole thing and + // update all the pointers and lengths to reflect the copy. + if (L <= remaining_bytes) { + size_t i; + for (i = 0; i < L; i++) + dst[i] = lit[i]; + dst += L; + lit += L; + remaining_bytes -= L; + L = 0; + } + // There isn't enough space to fit the whole literal. Copy as + // much of it as we can, update the pointers and the value of + // L, and report that the destination buffer is full. Note that + // we always write right up to the end of the destination buffer. + else { + size_t i; + for (i = 0; i < remaining_bytes; i++) + dst[i] = lit[i]; + dst += remaining_bytes; + lit += remaining_bytes; + L -= remaining_bytes; + goto DestinationBufferIsFull; + } + // The match goes just like the literal does. We copy as much as + // we can byte-by-byte, and if we reach the end of the buffer + // before finishing, we return to the caller indicating that + // the buffer is full. + if (M <= remaining_bytes) { + size_t i; + for (i = 0; i < M; i++) + dst[i] = dst[i - D]; + dst += M; + remaining_bytes -= M; + M = 0; + (void)M; // no dead store warning + // We don't need to update M = 0, because there's no partial + // symbol to continue executing. Either we're at the end of + // the block, in which case we will never need to resume with + // this state, or we're going to decode another L, M, D set, + // which will overwrite M anyway. + // + // But we still set M = 0, to maintain the post-condition. + } else { + size_t i; + for (i = 0; i < remaining_bytes; i++) + dst[i] = dst[i - D]; + dst += remaining_bytes; + M -= remaining_bytes; +DestinationBufferIsFull: + // Because we want to be able to resume decoding where we've left + // off (even in the middle of a literal or match), we need to + // update all of the block state fields with the current values + // so that we can resume execution from this point once the + // caller has given us more space to write into. + bs->l_value = L; + bs->m_value = M; + bs->d_value = D; + bs->l_state = l_state; + bs->m_state = m_state; + bs->d_state = d_state; + bs->lmd_in_stream = in; + bs->n_matches = symbols; + bs->lmd_in_buf = (uint32_t)(src - s->src); + bs->current_literal = lit; + s->dst = dst; + return LZFSE_STATUS_DST_FULL; + } + // Restore the "sham" decremented value of remaining_bytes and + // continue to the next L, M, D triple. We'll just be back in + // the careful path again, but this only happens at the very end + // of the buffer, so a little minor inefficiency here is a good + // tradeoff for simpler code. + remaining_bytes -= 32; + } + } + // Because we've finished with the whole block, we don't need to update + // any of the blockstate fields; they will not be used again. We just + // update the destination pointer in the state object and return. + s->dst = dst; + return LZFSE_STATUS_OK; } -int lzfse_decode(lzfse_decoder_state *s) { - while (1) { - // Are we inside a block? - switch (s->block_magic) { - case LZFSE_NO_BLOCK_MAGIC: { - uint32_t magic; - // We need at least 4 bytes of magic number to identify next block - if (s->src + 4 > s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // SRC truncated - magic = load4(s->src); - - if (magic == LZFSE_ENDOFSTREAM_BLOCK_MAGIC) { - s->src += 4; - s->end_of_stream = 1; - return LZFSE_STATUS_OK; // done - } - - if (magic == LZFSE_UNCOMPRESSED_BLOCK_MAGIC) { - uncompressed_block_decoder_state *bs = NULL; - if (s->src + sizeof(uncompressed_block_header) > s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // SRC truncated - // Setup state for uncompressed block - bs = &(s->uncompressed_block_state); - bs->n_raw_bytes = - load4(s->src + offsetof(uncompressed_block_header, n_raw_bytes)); - s->src += sizeof(uncompressed_block_header); - s->block_magic = magic; - break; - } - - if (magic == LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC) { - lzvn_compressed_block_decoder_state *bs = NULL; - if (s->src + sizeof(lzvn_compressed_block_header) > s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // SRC truncated - // Setup state for compressed LZVN block - bs = &(s->compressed_lzvn_block_state); - bs->n_raw_bytes = - load4(s->src + offsetof(lzvn_compressed_block_header, n_raw_bytes)); - bs->n_payload_bytes = load4( - s->src + offsetof(lzvn_compressed_block_header, n_payload_bytes)); - bs->d_prev = 0; - s->src += sizeof(lzvn_compressed_block_header); - s->block_magic = magic; - break; - } - - if (magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC || - magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { - lzfse_compressed_block_header_v1 header1; - size_t header_size = 0; - lzfse_compressed_block_decoder_state *bs = NULL; - - // Decode compressed headers - if (magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { - const lzfse_compressed_block_header_v2 *header2; - int decodeStatus; - // Check we have the fixed part of the structure - if (s->src + offsetof(lzfse_compressed_block_header_v2, freq) > s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // SRC truncated - - // Get size, and check we have the entire structure - header2 = (const lzfse_compressed_block_header_v2 *)s->src; // not aligned, OK - header_size = lzfse_decode_v2_header_size(header2); - if (s->src + header_size > s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // SRC truncated - decodeStatus = lzfse_decode_v1(&header1, header2); - if (decodeStatus != 0) - return LZFSE_STATUS_ERROR; // failed - } else { - if (s->src + sizeof(lzfse_compressed_block_header_v1) > s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // SRC truncated - memcpy(&header1, s->src, sizeof(lzfse_compressed_block_header_v1)); - header_size = sizeof(lzfse_compressed_block_header_v1); - } - - // We require the header + entire encoded block to be present in SRC - // during the entire block decoding. - // This can be relaxed somehow, if it becomes a limiting factor, at the - // price of a more complex state maintenance. - // For DST, we can't easily require space for the entire decoded block, - // because it may expand to something very very large. - if (s->src + header_size + header1.n_literal_payload_bytes + - header1.n_lmd_payload_bytes > - s->src_end) - return LZFSE_STATUS_SRC_EMPTY; // need all encoded block - - // Sanity checks - if (lzfse_check_block_header_v1(&header1) != 0) { - return LZFSE_STATUS_ERROR; - } - - // Skip header - s->src += header_size; - - // Setup state for compressed V1 block from header - bs = &(s->compressed_lzfse_block_state); - bs->n_lmd_payload_bytes = header1.n_lmd_payload_bytes; - bs->n_matches = header1.n_matches; - fse_init_decoder_table(LZFSE_ENCODE_LITERAL_STATES, - LZFSE_ENCODE_LITERAL_SYMBOLS, - header1.literal_freq, bs->literal_decoder); - fse_init_value_decoder_table( - LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, header1.l_freq, - l_extra_bits, l_base_value, bs->l_decoder); - fse_init_value_decoder_table( - LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, header1.m_freq, - m_extra_bits, m_base_value, bs->m_decoder); - fse_init_value_decoder_table( - LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, header1.d_freq, - d_extra_bits, d_base_value, bs->d_decoder); - - // Decode literals - { - fse_in_stream in; - const uint8_t *buf_start = s->src_begin; - const uint8_t *buf; - fse_state state0; - fse_state state1; - fse_state state2; - fse_state state3; - uint32_t i; - - s->src += header1.n_literal_payload_bytes; // skip literal payload - buf = s->src; // read bits backwards from the end - if (fse_in_init(&in, header1.literal_bits, &buf, buf_start) != 0) - return LZFSE_STATUS_ERROR; - - state0 = header1.literal_state[0]; - state1 = header1.literal_state[1]; - state2 = header1.literal_state[2]; - state3 = header1.literal_state[3]; - - for (i = 0; i < header1.n_literals; i += 4) // n_literals is multiple of 4 - { +int lzfse_decode(lzfse_decoder_state *s) +{ + while (1) { + // Are we inside a block? + switch (s->block_magic) { + case LZFSE_NO_BLOCK_MAGIC: { + uint32_t magic; + // We need at least 4 bytes of magic number to identify next block + if (s->src + 4 > s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated + magic = load4(s->src); + + if (magic == LZFSE_ENDOFSTREAM_BLOCK_MAGIC) { + s->src += 4; + s->end_of_stream = 1; + return LZFSE_STATUS_OK; // done + } + + if (magic == LZFSE_UNCOMPRESSED_BLOCK_MAGIC) { + uncompressed_block_decoder_state *bs = NULL; + if (s->src + sizeof(uncompressed_block_header) > + s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated + // Setup state for uncompressed block + bs = &(s->uncompressed_block_state); + bs->n_raw_bytes = load4( + s->src + + offsetof(uncompressed_block_header, + n_raw_bytes)); + s->src += sizeof(uncompressed_block_header); + s->block_magic = magic; + break; + } + + if (magic == LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC) { + lzvn_compressed_block_decoder_state *bs = NULL; + if (s->src + + sizeof(lzvn_compressed_block_header) > + s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated + // Setup state for compressed LZVN block + bs = &(s->compressed_lzvn_block_state); + bs->n_raw_bytes = load4( + s->src + + offsetof(lzvn_compressed_block_header, + n_raw_bytes)); + bs->n_payload_bytes = load4( + s->src + + offsetof(lzvn_compressed_block_header, + n_payload_bytes)); + bs->d_prev = 0; + s->src += sizeof(lzvn_compressed_block_header); + s->block_magic = magic; + break; + } + + if (magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC || + magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { + lzfse_compressed_block_header_v1 header1; + size_t header_size = 0; + lzfse_compressed_block_decoder_state *bs = NULL; + + // Decode compressed headers + if (magic == LZFSE_COMPRESSEDV2_BLOCK_MAGIC) { + const lzfse_compressed_block_header_v2 + *header2; + int decodeStatus; + // Check we have the fixed part of the structure + if (s->src + + offsetof( + lzfse_compressed_block_header_v2, + freq) > + s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated + + // Get size, and check we have the entire structure + header2 = + (const lzfse_compressed_block_header_v2 + *)s + ->src; // not aligned, OK + header_size = + lzfse_decode_v2_header_size( + header2); + if (s->src + header_size > s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated + decodeStatus = lzfse_decode_v1(&header1, + header2); + if (decodeStatus != 0) + return LZFSE_STATUS_ERROR; // failed + } else { + if (s->src + + sizeof(lzfse_compressed_block_header_v1) > + s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // SRC truncated + memcpy(&header1, s->src, + sizeof(lzfse_compressed_block_header_v1)); + header_size = sizeof( + lzfse_compressed_block_header_v1); + } + + // We require the header + entire encoded block to be present in SRC + // during the entire block decoding. + // This can be relaxed somehow, if it becomes a limiting factor, at the + // price of a more complex state maintenance. + // For DST, we can't easily require space for the entire decoded block, + // because it may expand to something very very large. + if (s->src + header_size + + header1.n_literal_payload_bytes + + header1.n_lmd_payload_bytes > + s->src_end) + return LZFSE_STATUS_SRC_EMPTY; // need all encoded block + + // Sanity checks + if (lzfse_check_block_header_v1(&header1) != + 0) { + return LZFSE_STATUS_ERROR; + } + + // Skip header + s->src += header_size; + + // Setup state for compressed V1 block from header + bs = &(s->compressed_lzfse_block_state); + bs->n_lmd_payload_bytes = + header1.n_lmd_payload_bytes; + bs->n_matches = header1.n_matches; + fse_init_decoder_table( + LZFSE_ENCODE_LITERAL_STATES, + LZFSE_ENCODE_LITERAL_SYMBOLS, + header1.literal_freq, + bs->literal_decoder); + fse_init_value_decoder_table( + LZFSE_ENCODE_L_STATES, + LZFSE_ENCODE_L_SYMBOLS, header1.l_freq, + l_extra_bits, l_base_value, + bs->l_decoder); + fse_init_value_decoder_table( + LZFSE_ENCODE_M_STATES, + LZFSE_ENCODE_M_SYMBOLS, header1.m_freq, + m_extra_bits, m_base_value, + bs->m_decoder); + fse_init_value_decoder_table( + LZFSE_ENCODE_D_STATES, + LZFSE_ENCODE_D_SYMBOLS, header1.d_freq, + d_extra_bits, d_base_value, + bs->d_decoder); + + // Decode literals + { + fse_in_stream in; + const uint8_t *buf_start = s->src_begin; + const uint8_t *buf; + fse_state state0; + fse_state state1; + fse_state state2; + fse_state state3; + uint32_t i; + + s->src += + header1.n_literal_payload_bytes; // skip literal payload + buf = s->src; // read bits backwards from the end + if (fse_in_init(&in, + header1.literal_bits, + &buf, buf_start) != 0) + return LZFSE_STATUS_ERROR; + + state0 = header1.literal_state[0]; + state1 = header1.literal_state[1]; + state2 = header1.literal_state[2]; + state3 = header1.literal_state[3]; + + for (i = 0; i < header1.n_literals; + i += + 4) // n_literals is multiple of 4 + { #if FSE_IOSTREAM_64 - if (fse_in_flush(&in, &buf, buf_start) != 0) - return LZFSE_STATUS_ERROR; // [57, 64] bits - bs->literals[i + 0] = - fse_decode(&state0, bs->literal_decoder, &in); // 10b max - bs->literals[i + 1] = - fse_decode(&state1, bs->literal_decoder, &in); // 10b max - bs->literals[i + 2] = - fse_decode(&state2, bs->literal_decoder, &in); // 10b max - bs->literals[i + 3] = - fse_decode(&state3, bs->literal_decoder, &in); // 10b max + if (fse_in_flush(&in, &buf, + buf_start) != + 0) + return LZFSE_STATUS_ERROR; // [57, 64] bits + bs->literals[i + 0] = fse_decode( + &state0, + bs->literal_decoder, + &in); // 10b max + bs->literals[i + 1] = fse_decode( + &state1, + bs->literal_decoder, + &in); // 10b max + bs->literals[i + 2] = fse_decode( + &state2, + bs->literal_decoder, + &in); // 10b max + bs->literals[i + 3] = fse_decode( + &state3, + bs->literal_decoder, + &in); // 10b max #else - if (fse_in_flush(&in, &buf, buf_start) != 0) - return LZFSE_STATUS_ERROR; // [25, 23] bits - bs->literals[i + 0] = - fse_decode(&state0, bs->literal_decoder, &in); // 10b max - bs->literals[i + 1] = - fse_decode(&state1, bs->literal_decoder, &in); // 10b max - if (fse_in_flush(&in, &buf, buf_start) != 0) - return LZFSE_STATUS_ERROR; // [25, 23] bits - bs->literals[i + 2] = - fse_decode(&state2, bs->literal_decoder, &in); // 10b max - bs->literals[i + 3] = - fse_decode(&state3, bs->literal_decoder, &in); // 10b max + if (fse_in_flush(&in, &buf, + buf_start) != + 0) + return LZFSE_STATUS_ERROR; // [25, 23] bits + bs->literals[i + 0] = fse_decode( + &state0, + bs->literal_decoder, + &in); // 10b max + bs->literals[i + 1] = fse_decode( + &state1, + bs->literal_decoder, + &in); // 10b max + if (fse_in_flush(&in, &buf, + buf_start) != + 0) + return LZFSE_STATUS_ERROR; // [25, 23] bits + bs->literals[i + 2] = fse_decode( + &state2, + bs->literal_decoder, + &in); // 10b max + bs->literals[i + 3] = fse_decode( + &state3, + bs->literal_decoder, + &in); // 10b max #endif - } - - bs->current_literal = bs->literals; - } // literals - - // SRC is not incremented to skip the LMD payload, since we need it - // during block decode. - // We will increment SRC at the end of the block only after this point. - - // Initialize the L,M,D decode stream, do not start decoding matches - // yet, and store decoder state - { - fse_in_stream in; - // read bits backwards from the end - const uint8_t *buf = s->src + header1.n_lmd_payload_bytes; - if (fse_in_init(&in, header1.lmd_bits, &buf, s->src) != 0) - return LZFSE_STATUS_ERROR; - - bs->l_state = header1.l_state; - bs->m_state = header1.m_state; - bs->d_state = header1.d_state; - bs->lmd_in_buf = (uint32_t)(buf - s->src); - bs->l_value = bs->m_value = 0; - // Initialize D to an illegal value so we can't erroneously use - // an uninitialized "previous" value. - bs->d_value = -1; - bs->lmd_in_stream = in; - } - - s->block_magic = magic; - break; - } - - // Here we have an invalid magic number - return LZFSE_STATUS_ERROR; - } // LZFSE_NO_BLOCK_MAGIC - - case LZFSE_UNCOMPRESSED_BLOCK_MAGIC: { - uncompressed_block_decoder_state *bs = &(s->uncompressed_block_state); - - // Compute the size (in bytes) of the data that we will actually copy. - // This size is minimum(bs->n_raw_bytes, space in src, space in dst). - - uint32_t copy_size = bs->n_raw_bytes; // bytes left to copy - size_t src_space, dst_space; - if (copy_size == 0) { - s->block_magic = 0; - break; - } // end of block - - if (s->src_end <= s->src) - return LZFSE_STATUS_SRC_EMPTY; // need more SRC data - src_space = s->src_end - s->src; - if (copy_size > src_space) - copy_size = (uint32_t)src_space; // limit to SRC data (> 0) - - if (s->dst_end <= s->dst) - return LZFSE_STATUS_DST_FULL; // need more DST capacity - dst_space = s->dst_end - s->dst; - if (copy_size > dst_space) - copy_size = (uint32_t)dst_space; // limit to DST capacity (> 0) - - // Now that we know that the copy size is bounded to the source and - // dest buffers, go ahead and copy the data. - // We always have copy_size > 0 here - memcpy(s->dst, s->src, copy_size); - s->src += copy_size; - s->dst += copy_size; - bs->n_raw_bytes -= copy_size; - - break; - } // LZFSE_UNCOMPRESSED_BLOCK_MAGIC - - case LZFSE_COMPRESSEDV1_BLOCK_MAGIC: - case LZFSE_COMPRESSEDV2_BLOCK_MAGIC: { - int status; - lzfse_compressed_block_decoder_state *bs = - &(s->compressed_lzfse_block_state); - // Require the entire LMD payload to be in SRC - if (s->src_end <= s->src || - bs->n_lmd_payload_bytes > (size_t)(s->src_end - s->src)) - return LZFSE_STATUS_SRC_EMPTY; - - status = lzfse_decode_lmd(s); - if (status != LZFSE_STATUS_OK) - return status; - - s->block_magic = LZFSE_NO_BLOCK_MAGIC; - s->src += bs->n_lmd_payload_bytes; // to next block - break; - } // LZFSE_COMPRESSEDV1_BLOCK_MAGIC || LZFSE_COMPRESSEDV2_BLOCK_MAGIC - - case LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC: { - lzvn_compressed_block_decoder_state *bs = - &(s->compressed_lzvn_block_state); - lzvn_decoder_state dstate; - size_t src_used, dst_used; - if (bs->n_payload_bytes > 0 && s->src_end <= s->src) - return LZFSE_STATUS_SRC_EMPTY; // need more SRC data - - // Init LZVN decoder state - memset(&dstate, 0x00, sizeof(dstate)); - dstate.src = s->src; - dstate.src_end = s->src_end; - if (dstate.src_end - s->src > bs->n_payload_bytes) - dstate.src_end = s->src + bs->n_payload_bytes; // limit to payload bytes - dstate.dst_begin = s->dst_begin; - dstate.dst = s->dst; - dstate.dst_end = s->dst_end; - if (dstate.dst_end - s->dst > bs->n_raw_bytes) - dstate.dst_end = s->dst + bs->n_raw_bytes; // limit to raw bytes - dstate.d_prev = bs->d_prev; - dstate.end_of_stream = 0; - - // Run LZVN decoder - lzvn_decode(&dstate); - - // Update our state - src_used = dstate.src - s->src; - dst_used = dstate.dst - s->dst; - if (src_used > bs->n_payload_bytes || dst_used > bs->n_raw_bytes) - return LZFSE_STATUS_ERROR; // sanity check - s->src = dstate.src; - s->dst = dstate.dst; - bs->n_payload_bytes -= (uint32_t)src_used; - bs->n_raw_bytes -= (uint32_t)dst_used; - bs->d_prev = (uint32_t)dstate.d_prev; - - // Test end of block - if (bs->n_payload_bytes == 0 && bs->n_raw_bytes == 0 && - dstate.end_of_stream) { - s->block_magic = 0; - break; - } // block done - - // Check for invalid state - if (bs->n_payload_bytes == 0 || bs->n_raw_bytes == 0 || - dstate.end_of_stream) - return LZFSE_STATUS_ERROR; - - // Here, block is not done and state is valid, so we need more space in dst. - return LZFSE_STATUS_DST_FULL; - } - - default: - return LZFSE_STATUS_ERROR; // invalid magic - - } // switch magic - - } // block loop - - return LZFSE_STATUS_OK; + } + + bs->current_literal = bs->literals; + } // literals + + // SRC is not incremented to skip the LMD payload, since we need it + // during block decode. + // We will increment SRC at the end of the block only after this point. + + // Initialize the L,M,D decode stream, do not start decoding matches + // yet, and store decoder state + { + fse_in_stream in; + // read bits backwards from the end + const uint8_t *buf = + s->src + + header1.n_lmd_payload_bytes; + if (fse_in_init(&in, header1.lmd_bits, + &buf, s->src) != 0) + return LZFSE_STATUS_ERROR; + + bs->l_state = header1.l_state; + bs->m_state = header1.m_state; + bs->d_state = header1.d_state; + bs->lmd_in_buf = + (uint32_t)(buf - s->src); + bs->l_value = bs->m_value = 0; + // Initialize D to an illegal value so we can't erroneously use + // an uninitialized "previous" value. + bs->d_value = -1; + bs->lmd_in_stream = in; + } + + s->block_magic = magic; + break; + } + + // Here we have an invalid magic number + return LZFSE_STATUS_ERROR; + } // LZFSE_NO_BLOCK_MAGIC + + case LZFSE_UNCOMPRESSED_BLOCK_MAGIC: { + uncompressed_block_decoder_state *bs = + &(s->uncompressed_block_state); + + // Compute the size (in bytes) of the data that we will actually copy. + // This size is minimum(bs->n_raw_bytes, space in src, space in dst). + + uint32_t copy_size = + bs->n_raw_bytes; // bytes left to copy + size_t src_space, dst_space; + if (copy_size == 0) { + s->block_magic = 0; + break; + } // end of block + + if (s->src_end <= s->src) + return LZFSE_STATUS_SRC_EMPTY; // need more SRC data + src_space = s->src_end - s->src; + if (copy_size > src_space) + copy_size = (uint32_t) + src_space; // limit to SRC data (> 0) + + if (s->dst_end <= s->dst) + return LZFSE_STATUS_DST_FULL; // need more DST capacity + dst_space = s->dst_end - s->dst; + if (copy_size > dst_space) + copy_size = (uint32_t) + dst_space; // limit to DST capacity (> 0) + + // Now that we know that the copy size is bounded to the source and + // dest buffers, go ahead and copy the data. + // We always have copy_size > 0 here + memcpy(s->dst, s->src, copy_size); + s->src += copy_size; + s->dst += copy_size; + bs->n_raw_bytes -= copy_size; + + break; + } // LZFSE_UNCOMPRESSED_BLOCK_MAGIC + + case LZFSE_COMPRESSEDV1_BLOCK_MAGIC: + case LZFSE_COMPRESSEDV2_BLOCK_MAGIC: { + int status; + lzfse_compressed_block_decoder_state *bs = + &(s->compressed_lzfse_block_state); + // Require the entire LMD payload to be in SRC + if (s->src_end <= s->src || + bs->n_lmd_payload_bytes > + (size_t)(s->src_end - s->src)) + return LZFSE_STATUS_SRC_EMPTY; + + status = lzfse_decode_lmd(s); + if (status != LZFSE_STATUS_OK) + return status; + + s->block_magic = LZFSE_NO_BLOCK_MAGIC; + s->src += bs->n_lmd_payload_bytes; // to next block + break; + } // LZFSE_COMPRESSEDV1_BLOCK_MAGIC || LZFSE_COMPRESSEDV2_BLOCK_MAGIC + + case LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC: { + lzvn_compressed_block_decoder_state *bs = + &(s->compressed_lzvn_block_state); + lzvn_decoder_state dstate; + size_t src_used, dst_used; + if (bs->n_payload_bytes > 0 && s->src_end <= s->src) + return LZFSE_STATUS_SRC_EMPTY; // need more SRC data + + // Init LZVN decoder state + memset(&dstate, 0x00, sizeof(dstate)); + dstate.src = s->src; + dstate.src_end = s->src_end; + if (dstate.src_end - s->src > bs->n_payload_bytes) + dstate.src_end = + s->src + + bs->n_payload_bytes; // limit to payload bytes + dstate.dst_begin = s->dst_begin; + dstate.dst = s->dst; + dstate.dst_end = s->dst_end; + if (dstate.dst_end - s->dst > bs->n_raw_bytes) + dstate.dst_end = + s->dst + + bs->n_raw_bytes; // limit to raw bytes + dstate.d_prev = bs->d_prev; + dstate.end_of_stream = 0; + + // Run LZVN decoder + lzvn_decode(&dstate); + + // Update our state + src_used = dstate.src - s->src; + dst_used = dstate.dst - s->dst; + if (src_used > bs->n_payload_bytes || + dst_used > bs->n_raw_bytes) + return LZFSE_STATUS_ERROR; // sanity check + s->src = dstate.src; + s->dst = dstate.dst; + bs->n_payload_bytes -= (uint32_t)src_used; + bs->n_raw_bytes -= (uint32_t)dst_used; + bs->d_prev = (uint32_t)dstate.d_prev; + + // Test end of block + if (bs->n_payload_bytes == 0 && bs->n_raw_bytes == 0 && + dstate.end_of_stream) { + s->block_magic = 0; + break; + } // block done + + // Check for invalid state + if (bs->n_payload_bytes == 0 || bs->n_raw_bytes == 0 || + dstate.end_of_stream) + return LZFSE_STATUS_ERROR; + + // Here, block is not done and state is valid, so we need more space in dst. + return LZFSE_STATUS_DST_FULL; + } + + default: + return LZFSE_STATUS_ERROR; // invalid magic + + } // switch magic + + } // block loop + + return LZFSE_STATUS_OK; } diff --git a/lzfse/lzfse_encode.c b/lzfse/lzfse_encode.c index f0742ec..6f1fff1 100644 --- a/lzfse/lzfse_encode.c +++ b/lzfse/lzfse_encode.c @@ -1,163 +1,158 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZFSE encode API +/* LZFSE encode API + */ #include "lzfse.h" #include "lzfse_internal.h" -size_t lzfse_encode_scratch_size() { - size_t s1 = sizeof(lzfse_encoder_state); - size_t s2 = lzvn_encode_scratch_size(); - return (s1 > s2) ? s1 : s2; // max(lzfse,lzvn) +size_t lzfse_encode_scratch_size() +{ + size_t s1 = sizeof(lzfse_encoder_state); + size_t s2 = lzvn_encode_scratch_size(); + return (s1 > s2) ? s1 : s2; // max(lzfse,lzvn) } size_t lzfse_encode_buffer_with_scratch(uint8_t *__restrict dst_buffer, - size_t dst_size, const uint8_t *__restrict src_buffer, - size_t src_size, void *__restrict scratch_buffer) { - const size_t original_size = src_size; - - // If input is really really small, go directly to uncompressed buffer - // (because LZVN will refuse to encode it, and we will report a failure) - if (src_size < LZVN_ENCODE_MIN_SRC_SIZE) - goto try_uncompressed; - - // If input is too small, try encoding with LZVN - if (src_size < LZFSE_ENCODE_LZVN_THRESHOLD) { - // need header + end-of-stream marker - size_t extra_size = 4 + sizeof(lzvn_compressed_block_header); - if (dst_size <= extra_size) - goto try_uncompressed; // DST is really too small, give up - - size_t sz = lzvn_encode_buffer( - dst_buffer + sizeof(lzvn_compressed_block_header), - dst_size - extra_size, src_buffer, src_size, scratch_buffer); - if (sz == 0 || sz >= src_size) - goto try_uncompressed; // failed, or no compression, fall back to - // uncompressed block - - // If we could encode, setup header and end-of-stream marker (we left room - // for them, no need to test) - lzvn_compressed_block_header header; - header.magic = LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC; - header.n_raw_bytes = (uint32_t)src_size; - header.n_payload_bytes = (uint32_t)sz; - memcpy(dst_buffer, &header, sizeof(header)); - store4(dst_buffer + sizeof(lzvn_compressed_block_header) + sz, - LZFSE_ENDOFSTREAM_BLOCK_MAGIC); - - return sz + extra_size; - } - - // Try encoding with LZFSE - { - lzfse_encoder_state *state = scratch_buffer; - memset(state, 0x00, sizeof *state); - if (lzfse_encode_init(state) != LZFSE_STATUS_OK) - goto try_uncompressed; - state->dst = dst_buffer; - state->dst_begin = dst_buffer; - state->dst_end = &dst_buffer[dst_size]; - state->src = src_buffer; - state->src_encode_i = 0; - - if (src_size >= 0xffffffffU) { - // lzfse only uses 32 bits for offsets internally, so if the input - // buffer is really huge, we need to process it in smaller chunks. - // Note that we switch over to this path for sizes much smaller - // 2GB because it's actually faster to change algorithms well before - // it's necessary for correctness. - // The first chunk, we just process normally. - const lzfse_offset encoder_block_size = 262144; - state->src_end = encoder_block_size; - if (lzfse_encode_base(state) != LZFSE_STATUS_OK) - goto try_uncompressed; - src_size -= encoder_block_size; - while (src_size >= encoder_block_size) { - // All subsequent chunks require a translation to keep the offsets - // from getting too big. Note that we are always going from - // encoder_block_size up to 2*encoder_block_size so that the - // offsets remain positive (as opposed to resetting to zero and - // having negative offsets). - state->src_end = 2 * encoder_block_size; - if (lzfse_encode_base(state) != LZFSE_STATUS_OK) - goto try_uncompressed; - lzfse_encode_translate(state, encoder_block_size); - src_size -= encoder_block_size; - } - // Set the end for the final chunk. - state->src_end = encoder_block_size + (lzfse_offset)src_size; - } - // If the source buffer is small enough to use 32-bit offsets, we simply - // encode the whole thing in a single chunk. - else - state->src_end = (lzfse_offset)src_size; - // This is either the trailing chunk (if the source file is huge), or - // the whole source file. - if (lzfse_encode_base(state) != LZFSE_STATUS_OK) - goto try_uncompressed; - if (lzfse_encode_finish(state) != LZFSE_STATUS_OK) - goto try_uncompressed; - // No error occured, return compressed size. - return state->dst - dst_buffer; - } + size_t dst_size, + const uint8_t *__restrict src_buffer, + size_t src_size, + void *__restrict scratch_buffer) +{ + const size_t original_size = src_size; + + // If input is really really small, go directly to uncompressed buffer + // (because LZVN will refuse to encode it, and we will report a failure) + if (src_size < LZVN_ENCODE_MIN_SRC_SIZE) + goto try_uncompressed; + + // If input is too small, try encoding with LZVN + if (src_size < LZFSE_ENCODE_LZVN_THRESHOLD) { + // need header + end-of-stream marker + size_t extra_size = 4 + sizeof(lzvn_compressed_block_header); + if (dst_size <= extra_size) + goto try_uncompressed; // DST is really too small, give up + + size_t sz = lzvn_encode_buffer( + dst_buffer + sizeof(lzvn_compressed_block_header), + dst_size - extra_size, src_buffer, src_size, + scratch_buffer); + if (sz == 0 || sz >= src_size) + goto try_uncompressed; // failed, or no compression, fall back to + // uncompressed block + + // If we could encode, setup header and end-of-stream marker (we left room + // for them, no need to test) + lzvn_compressed_block_header header; + header.magic = LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC; + header.n_raw_bytes = (uint32_t)src_size; + header.n_payload_bytes = (uint32_t)sz; + memcpy(dst_buffer, &header, sizeof(header)); + store4(dst_buffer + sizeof(lzvn_compressed_block_header) + sz, + LZFSE_ENDOFSTREAM_BLOCK_MAGIC); + + return sz + extra_size; + } + + // Try encoding with LZFSE + { + lzfse_encoder_state *state = scratch_buffer; + memset(state, 0x00, sizeof *state); + if (lzfse_encode_init(state) != LZFSE_STATUS_OK) + goto try_uncompressed; + state->dst = dst_buffer; + state->dst_begin = dst_buffer; + state->dst_end = &dst_buffer[dst_size]; + state->src = src_buffer; + state->src_encode_i = 0; + + if (src_size >= 0xffffffffU) { + // lzfse only uses 32 bits for offsets internally, so if the input + // buffer is really huge, we need to process it in smaller chunks. + // Note that we switch over to this path for sizes much smaller + // 2GB because it's actually faster to change algorithms well before + // it's necessary for correctness. + // The first chunk, we just process normally. + const lzfse_offset encoder_block_size = 262144; + state->src_end = encoder_block_size; + if (lzfse_encode_base(state) != LZFSE_STATUS_OK) + goto try_uncompressed; + src_size -= encoder_block_size; + while (src_size >= encoder_block_size) { + // All subsequent chunks require a translation to keep the offsets + // from getting too big. Note that we are always going from + // encoder_block_size up to 2*encoder_block_size so that the + // offsets remain positive (as opposed to resetting to zero and + // having negative offsets). + state->src_end = 2 * encoder_block_size; + if (lzfse_encode_base(state) != LZFSE_STATUS_OK) + goto try_uncompressed; + lzfse_encode_translate(state, + encoder_block_size); + src_size -= encoder_block_size; + } + // Set the end for the final chunk. + state->src_end = + encoder_block_size + (lzfse_offset)src_size; + } + // If the source buffer is small enough to use 32-bit offsets, we simply + // encode the whole thing in a single chunk. + else + state->src_end = (lzfse_offset)src_size; + // This is either the trailing chunk (if the source file is huge), or + // the whole source file. + if (lzfse_encode_base(state) != LZFSE_STATUS_OK) + goto try_uncompressed; + if (lzfse_encode_finish(state) != LZFSE_STATUS_OK) + goto try_uncompressed; + // No error occured, return compressed size. + return state->dst - dst_buffer; + } try_uncompressed: - // Compression failed for some reason. If we can fit the data into the - // output buffer uncompressed, go ahead and do that instead. - if (original_size + 12 <= dst_size && original_size < INT32_MAX) { - uncompressed_block_header header = {.magic = LZFSE_UNCOMPRESSED_BLOCK_MAGIC, - .n_raw_bytes = (uint32_t)src_size}; - uint8_t *dst_end = dst_buffer; - memcpy(dst_end, &header, sizeof header); - dst_end += sizeof header; - memcpy(dst_end, src_buffer, original_size); - dst_end += original_size; - store4(dst_end, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); - dst_end += 4; - return dst_end - dst_buffer; - } - - // Otherwise, there's nothing we can do, so return zero. - return 0; + // Compression failed for some reason. If we can fit the data into the + // output buffer uncompressed, go ahead and do that instead. + if (original_size + 12 <= dst_size && original_size < INT32_MAX) { + uncompressed_block_header header = { + .magic = LZFSE_UNCOMPRESSED_BLOCK_MAGIC, + .n_raw_bytes = (uint32_t)src_size + }; + uint8_t *dst_end = dst_buffer; + memcpy(dst_end, &header, sizeof header); + dst_end += sizeof header; + memcpy(dst_end, src_buffer, original_size); + dst_end += original_size; + store4(dst_end, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); + dst_end += 4; + return dst_end - dst_buffer; + } + + // Otherwise, there's nothing we can do, so return zero. + return 0; } size_t lzfse_encode_buffer(uint8_t *__restrict dst_buffer, size_t dst_size, - const uint8_t *__restrict src_buffer, - size_t src_size, void *__restrict scratch_buffer) { - int has_malloc = 0; - size_t ret = 0; - - // Deal with the possible NULL pointer - if (scratch_buffer == NULL) { - // +1 in case scratch size could be zero - scratch_buffer = malloc(lzfse_encode_scratch_size() + 1); - has_malloc = 1; - } - if (scratch_buffer == NULL) - return 0; - ret = lzfse_encode_buffer_with_scratch(dst_buffer, - dst_size, src_buffer, - src_size, scratch_buffer); - if (has_malloc) - free(scratch_buffer); - return ret; + const uint8_t *__restrict src_buffer, + size_t src_size, void *__restrict scratch_buffer) +{ + int has_malloc = 0; + size_t ret = 0; + + // Deal with the possible NULL pointer + if (scratch_buffer == NULL) { + // +1 in case scratch size could be zero + scratch_buffer = malloc(lzfse_encode_scratch_size() + 1); + has_malloc = 1; + } + if (scratch_buffer == NULL) + return 0; + ret = lzfse_encode_buffer_with_scratch(dst_buffer, dst_size, src_buffer, + src_size, scratch_buffer); + if (has_malloc) + free(scratch_buffer); + return ret; } diff --git a/lzfse/lzfse_encode_base.c b/lzfse/lzfse_encode_base.c index a813fba..e4a2508 100644 --- a/lzfse/lzfse_encode_base.c +++ b/lzfse/lzfse_encode_base.c @@ -1,40 +1,28 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZFSE encoder +/* LZFSE encoder + */ #include "lzfse_internal.h" #include "lzfse_encode_tables.h" /*! @abstract Get hash in range [0, LZFSE_ENCODE_HASH_VALUES-1] from 4 bytes in X. */ -static inline uint32_t hashX(uint32_t x) { - return (x * 2654435761U) >> - (32 - LZFSE_ENCODE_HASH_BITS); // Knuth multiplicative hash +static inline uint32_t hashX(uint32_t x) +{ + return (x * 2654435761U) >> + (32 - LZFSE_ENCODE_HASH_BITS); // Knuth multiplicative hash } /*! @abstract Return value with all 0 except nbits<=32 unsigned bits from V * at bit offset OFFSET. * V is assumed to fit on nbits bits. */ -static inline uint64_t setField(uint32_t v, int offset, int nbits) { - return ((uint64_t)v << (uint64_t)offset); +static inline uint64_t setField(uint32_t v, int offset, int nbits) +{ + return ((uint64_t)v << (uint64_t)offset); } /*! @abstract Encode all fields, except freq, from a @@ -42,67 +30,70 @@ static inline uint64_t setField(uint32_t v, int offset, int nbits) { * All but the header_size and freq fields of the output are modified. */ static inline void lzfse_encode_v1_state(lzfse_compressed_block_header_v2 *out, - const lzfse_compressed_block_header_v1 *in) { - out->magic = LZFSE_COMPRESSEDV2_BLOCK_MAGIC; - out->n_raw_bytes = in->n_raw_bytes; - - // Literal state - out->packed_fields[0] = setField(in->n_literals, 0, 20) | - setField(in->n_literal_payload_bytes, 20, 20) | - setField(in->n_matches, 40, 20) | - setField(7 + in->literal_bits, 60, 3); - out->packed_fields[1] = setField(in->literal_state[0], 0, 10) | - setField(in->literal_state[1], 10, 10) | - setField(in->literal_state[2], 20, 10) | - setField(in->literal_state[3], 30, 10) | - setField(in->n_lmd_payload_bytes, 40, 20) | - setField(7 + in->lmd_bits, 60, 3); - out->packed_fields[2] = out->packed_fields[2] // header_size already stored in v[2] - | setField(in->l_state, 32, 10) | setField(in->m_state, 42, 10) | - setField(in->d_state, 52, 10); + const lzfse_compressed_block_header_v1 *in) +{ + out->magic = LZFSE_COMPRESSEDV2_BLOCK_MAGIC; + out->n_raw_bytes = in->n_raw_bytes; + + // Literal state + out->packed_fields[0] = setField(in->n_literals, 0, 20) | + setField(in->n_literal_payload_bytes, 20, 20) | + setField(in->n_matches, 40, 20) | + setField(7 + in->literal_bits, 60, 3); + out->packed_fields[1] = setField(in->literal_state[0], 0, 10) | + setField(in->literal_state[1], 10, 10) | + setField(in->literal_state[2], 20, 10) | + setField(in->literal_state[3], 30, 10) | + setField(in->n_lmd_payload_bytes, 40, 20) | + setField(7 + in->lmd_bits, 60, 3); + out->packed_fields[2] = + out->packed_fields[2] // header_size already stored in v[2] + | setField(in->l_state, 32, 10) | + setField(in->m_state, 42, 10) | setField(in->d_state, 52, 10); } /*! @abstract Encode an entry value in a freq table. Return bits, and sets * *nbits to the number of bits to serialize. */ -static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) { - // Fixed Huffman code, bits are read from LSB. - // Note that we rely on the position of the first '0' bit providing the number - // of bits. - switch (value) { - case 0: - *nbits = 2; - return 0; // 0.0 - case 1: - *nbits = 2; - return 2; // 1.0 - case 2: - *nbits = 3; - return 1; // 0.01 - case 3: - *nbits = 3; - return 5; // 1.01 - case 4: - *nbits = 5; - return 3; // 00.011 - case 5: - *nbits = 5; - return 11; // 01.011 - case 6: - *nbits = 5; - return 19; // 10.011 - case 7: - *nbits = 5; - return 27; // 11.011 - default: - break; - } - if (value < 24) { - *nbits = 8; // 4+4 - return 7 + ((value - 8) << 4); // xxxx.0111 - } - // 24..1047 - *nbits = 14; // 4+10 - return ((value - 24) << 4) + 15; // xxxxxxxxxx.1111 +static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) +{ + // Fixed Huffman code, bits are read from LSB. + // Note that we rely on the position of the first '0' bit providing the number + // of bits. + switch (value) { + case 0: + *nbits = 2; + return 0; // 0.0 + case 1: + *nbits = 2; + return 2; // 1.0 + case 2: + *nbits = 3; + return 1; // 0.01 + case 3: + *nbits = 3; + return 5; // 1.01 + case 4: + *nbits = 5; + return 3; // 00.011 + case 5: + *nbits = 5; + return 11; // 01.011 + case 6: + *nbits = 5; + return 19; // 10.011 + case 7: + *nbits = 5; + return 27; // 11.011 + default: + break; + } + if (value < 24) { + *nbits = 8; // 4+4 + return 7 + ((value - 8) << 4); // xxxx.0111 + } + // 24..1047 + *nbits = 14; // 4+10 + return ((value - 24) << 4) + 15; // xxxxxxxxxx.1111 } /*! @abstract Encode all tables from a lzfse_compressed_block_header_v1 @@ -111,412 +102,425 @@ static inline uint32_t lzfse_encode_v1_freq_value(int value, int *nbits) { * @return Size of the lzfse_compressed_block_header_v2 */ static inline size_t lzfse_encode_v1_freq_table(lzfse_compressed_block_header_v2 *out, - const lzfse_compressed_block_header_v1 *in) { - uint32_t accum = 0; - int accum_nbits = 0; - const uint16_t *src = &(in->l_freq[0]); // first value of first table (struct - // will not be modified, so this code - // will remain valid) - uint8_t *dst = &(out->freq[0]); - for (int i = 0; i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + - LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; - i++) { - // Encode one value to accum - int nbits = 0; - uint32_t bits = lzfse_encode_v1_freq_value(src[i], &nbits); - accum |= bits << accum_nbits; - accum_nbits += nbits; - - // Store bytes from accum to output buffer - while (accum_nbits >= 8) { - *dst = (uint8_t)(accum & 0xff); - accum >>= 8; - accum_nbits -= 8; - dst++; - } - } - // Store final byte if needed - if (accum_nbits > 0) { - *dst = (uint8_t)(accum & 0xff); - dst++; - } - - // Return final size of out - uint32_t header_size = (uint32_t)(dst - (uint8_t *)out); - out->packed_fields[0] = 0; - out->packed_fields[1] = 0; - out->packed_fields[2] = setField(header_size, 0, 32); - - return header_size; + const lzfse_compressed_block_header_v1 *in) +{ + uint32_t accum = 0; + int accum_nbits = 0; + const uint16_t *src = + &(in->l_freq[0]); // first value of first table (struct + // will not be modified, so this code + // will remain valid) + uint8_t *dst = &(out->freq[0]); + for (int i = 0; + i < LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + + LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS; + i++) { + // Encode one value to accum + int nbits = 0; + uint32_t bits = lzfse_encode_v1_freq_value(src[i], &nbits); + accum |= bits << accum_nbits; + accum_nbits += nbits; + + // Store bytes from accum to output buffer + while (accum_nbits >= 8) { + *dst = (uint8_t)(accum & 0xff); + accum >>= 8; + accum_nbits -= 8; + dst++; + } + } + // Store final byte if needed + if (accum_nbits > 0) { + *dst = (uint8_t)(accum & 0xff); + dst++; + } + + // Return final size of out + uint32_t header_size = (uint32_t)(dst - (uint8_t *)out); + out->packed_fields[0] = 0; + out->packed_fields[1] = 0; + out->packed_fields[2] = setField(header_size, 0, 32); + + return header_size; } -// We need to limit forward match length to make sure it won't split into a too -// large number of LMD. -// The limit itself is quite large, so it doesn't really impact compression -// ratio. -// The matches may still be expanded backwards by a few bytes, so the final -// length may be greater than this limit, which is OK. +/* We need to limit forward match length to make sure it won't split into a too + * large number of LMD. + * The limit itself is quite large, so it doesn't really impact compression + * ratio. + * The matches may still be expanded backwards by a few bytes, so the final + * length may be greater than this limit, which is OK. + */ #define LZFSE_ENCODE_MAX_MATCH_LENGTH (100 * LZFSE_ENCODE_MAX_M_VALUE) -// =============================================================== -// Encoder back end +/* =============================================================== + * Encoder back end + */ /*! @abstract Encode matches stored in STATE into a compressed/uncompressed block. * @return LZFSE_STATUS_OK on success. * @return LZFSE_STATUS_DST_FULL and restore initial state if output buffer is * full. */ -static int lzfse_encode_matches(lzfse_encoder_state *s) { - if (s->n_literals == 0 && s->n_matches == 0) - return LZFSE_STATUS_OK; // nothing to store, OK - - uint32_t l_occ[LZFSE_ENCODE_L_SYMBOLS]; - uint32_t m_occ[LZFSE_ENCODE_M_SYMBOLS]; - uint32_t d_occ[LZFSE_ENCODE_D_SYMBOLS]; - uint32_t literal_occ[LZFSE_ENCODE_LITERAL_SYMBOLS]; - fse_encoder_entry l_encoder[LZFSE_ENCODE_L_SYMBOLS]; - fse_encoder_entry m_encoder[LZFSE_ENCODE_M_SYMBOLS]; - fse_encoder_entry d_encoder[LZFSE_ENCODE_D_SYMBOLS]; - fse_encoder_entry literal_encoder[LZFSE_ENCODE_LITERAL_SYMBOLS]; - int ok = 1; - lzfse_compressed_block_header_v1 header1 = {0}; - lzfse_compressed_block_header_v2 *header2 = 0; - - // Keep initial state to be able to restore it if DST full - uint8_t *dst0 = s->dst; - uint32_t n_literals0 = s->n_literals; - - // Add 0x00 literals until n_literals multiple of 4, since we encode 4 - // interleaved literal streams. - while (s->n_literals & 3) { - uint32_t n = s->n_literals++; - s->literals[n] = 0; - } - - // Encode previous distance - uint32_t d_prev = 0; - for (uint32_t i = 0; i < s->n_matches; i++) { - uint32_t d = s->d_values[i]; - if (d == d_prev) - s->d_values[i] = 0; - else - d_prev = d; - } - - // Clear occurrence tables - memset(l_occ, 0, sizeof(l_occ)); - memset(m_occ, 0, sizeof(m_occ)); - memset(d_occ, 0, sizeof(d_occ)); - memset(literal_occ, 0, sizeof(literal_occ)); - - // Update occurrence tables in all 4 streams (L,M,D,literals) - uint32_t l_sum = 0; - uint32_t m_sum = 0; - for (uint32_t i = 0; i < s->n_matches; i++) { - uint32_t l = s->l_values[i]; - l_sum += l; - l_occ[l_base_from_value(l)]++; - } - for (uint32_t i = 0; i < s->n_matches; i++) { - uint32_t m = s->m_values[i]; - m_sum += m; - m_occ[m_base_from_value(m)]++; - } - for (uint32_t i = 0; i < s->n_matches; i++) - d_occ[d_base_from_value(s->d_values[i])]++; - for (uint32_t i = 0; i < s->n_literals; i++) - literal_occ[s->literals[i]]++; - - // Make sure we have enough room for a _full_ V2 header - if (s->dst + sizeof(lzfse_compressed_block_header_v2) > s->dst_end) { - ok = 0; - goto END; - } - header2 = (lzfse_compressed_block_header_v2 *)(s->dst); - - // Setup header V1 - header1.magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; - header1.n_raw_bytes = m_sum + l_sum; - header1.n_matches = s->n_matches; - header1.n_literals = s->n_literals; - - // Normalize occurrence tables to freq tables - fse_normalize_freq(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, l_occ, - header1.l_freq); - fse_normalize_freq(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, m_occ, - header1.m_freq); - fse_normalize_freq(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, d_occ, - header1.d_freq); - fse_normalize_freq(LZFSE_ENCODE_LITERAL_STATES, LZFSE_ENCODE_LITERAL_SYMBOLS, - literal_occ, header1.literal_freq); - - // Compress freq tables to V2 header, and get actual size of V2 header - s->dst += lzfse_encode_v1_freq_table(header2, &header1); - - // Initialize encoder tables from freq tables - fse_init_encoder_table(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, - header1.l_freq, l_encoder); - fse_init_encoder_table(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, - header1.m_freq, m_encoder); - fse_init_encoder_table(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, - header1.d_freq, d_encoder); - fse_init_encoder_table(LZFSE_ENCODE_LITERAL_STATES, - LZFSE_ENCODE_LITERAL_SYMBOLS, header1.literal_freq, - literal_encoder); - - // Encode literals - { - fse_out_stream out; - fse_out_init(&out); - fse_state state0, state1, state2, state3; - state0 = state1 = state2 = state3 = 0; - - uint8_t *buf = s->dst; - uint32_t i = s->n_literals; // I multiple of 4 - // We encode starting from the last literal so we can decode starting from - // the first - while (i > 0) { - if (buf + 16 > s->dst_end) { - ok = 0; - goto END; - } // out full - i -= 4; - fse_encode(&state3, literal_encoder, &out, s->literals[i + 3]); // 10b - fse_encode(&state2, literal_encoder, &out, s->literals[i + 2]); // 10b +static int lzfse_encode_matches(lzfse_encoder_state *s) +{ + if (s->n_literals == 0 && s->n_matches == 0) + return LZFSE_STATUS_OK; // nothing to store, OK + + uint32_t l_occ[LZFSE_ENCODE_L_SYMBOLS]; + uint32_t m_occ[LZFSE_ENCODE_M_SYMBOLS]; + uint32_t d_occ[LZFSE_ENCODE_D_SYMBOLS]; + uint32_t literal_occ[LZFSE_ENCODE_LITERAL_SYMBOLS]; + fse_encoder_entry l_encoder[LZFSE_ENCODE_L_SYMBOLS]; + fse_encoder_entry m_encoder[LZFSE_ENCODE_M_SYMBOLS]; + fse_encoder_entry d_encoder[LZFSE_ENCODE_D_SYMBOLS]; + fse_encoder_entry literal_encoder[LZFSE_ENCODE_LITERAL_SYMBOLS]; + int ok = 1; + lzfse_compressed_block_header_v1 header1 = { 0 }; + lzfse_compressed_block_header_v2 *header2 = 0; + + // Keep initial state to be able to restore it if DST full + uint8_t *dst0 = s->dst; + uint32_t n_literals0 = s->n_literals; + + // Add 0x00 literals until n_literals multiple of 4, since we encode 4 + // interleaved literal streams. + while (s->n_literals & 3) { + uint32_t n = s->n_literals++; + s->literals[n] = 0; + } + + // Encode previous distance + uint32_t d_prev = 0; + for (uint32_t i = 0; i < s->n_matches; i++) { + uint32_t d = s->d_values[i]; + if (d == d_prev) + s->d_values[i] = 0; + else + d_prev = d; + } + + // Clear occurrence tables + memset(l_occ, 0, sizeof(l_occ)); + memset(m_occ, 0, sizeof(m_occ)); + memset(d_occ, 0, sizeof(d_occ)); + memset(literal_occ, 0, sizeof(literal_occ)); + + // Update occurrence tables in all 4 streams (L,M,D,literals) + uint32_t l_sum = 0; + uint32_t m_sum = 0; + for (uint32_t i = 0; i < s->n_matches; i++) { + uint32_t l = s->l_values[i]; + l_sum += l; + l_occ[l_base_from_value(l)]++; + } + for (uint32_t i = 0; i < s->n_matches; i++) { + uint32_t m = s->m_values[i]; + m_sum += m; + m_occ[m_base_from_value(m)]++; + } + for (uint32_t i = 0; i < s->n_matches; i++) + d_occ[d_base_from_value(s->d_values[i])]++; + for (uint32_t i = 0; i < s->n_literals; i++) + literal_occ[s->literals[i]]++; + + // Make sure we have enough room for a _full_ V2 header + if (s->dst + sizeof(lzfse_compressed_block_header_v2) > s->dst_end) { + ok = 0; + goto END; + } + header2 = (lzfse_compressed_block_header_v2 *)(s->dst); + + // Setup header V1 + header1.magic = LZFSE_COMPRESSEDV1_BLOCK_MAGIC; + header1.n_raw_bytes = m_sum + l_sum; + header1.n_matches = s->n_matches; + header1.n_literals = s->n_literals; + + // Normalize occurrence tables to freq tables + fse_normalize_freq(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, l_occ, + header1.l_freq); + fse_normalize_freq(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, m_occ, + header1.m_freq); + fse_normalize_freq(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, d_occ, + header1.d_freq); + fse_normalize_freq(LZFSE_ENCODE_LITERAL_STATES, + LZFSE_ENCODE_LITERAL_SYMBOLS, literal_occ, + header1.literal_freq); + + // Compress freq tables to V2 header, and get actual size of V2 header + s->dst += lzfse_encode_v1_freq_table(header2, &header1); + + // Initialize encoder tables from freq tables + fse_init_encoder_table(LZFSE_ENCODE_L_STATES, LZFSE_ENCODE_L_SYMBOLS, + header1.l_freq, l_encoder); + fse_init_encoder_table(LZFSE_ENCODE_M_STATES, LZFSE_ENCODE_M_SYMBOLS, + header1.m_freq, m_encoder); + fse_init_encoder_table(LZFSE_ENCODE_D_STATES, LZFSE_ENCODE_D_SYMBOLS, + header1.d_freq, d_encoder); + fse_init_encoder_table(LZFSE_ENCODE_LITERAL_STATES, + LZFSE_ENCODE_LITERAL_SYMBOLS, + header1.literal_freq, literal_encoder); + + // Encode literals + { + fse_out_stream out; + fse_out_init(&out); + fse_state state0, state1, state2, state3; + state0 = state1 = state2 = state3 = 0; + + uint8_t *buf = s->dst; + uint32_t i = s->n_literals; // I multiple of 4 + // We encode starting from the last literal so we can decode starting from + // the first + while (i > 0) { + if (buf + 16 > s->dst_end) { + ok = 0; + goto END; + } // out full + i -= 4; + fse_encode(&state3, literal_encoder, &out, + s->literals[i + 3]); // 10b + fse_encode(&state2, literal_encoder, &out, + s->literals[i + 2]); // 10b #if !FSE_IOSTREAM_64 - fse_out_flush(&out, &buf); + fse_out_flush(&out, &buf); #endif - fse_encode(&state1, literal_encoder, &out, s->literals[i + 1]); // 10b - fse_encode(&state0, literal_encoder, &out, s->literals[i + 0]); // 10b - fse_out_flush(&out, &buf); - } - fse_out_finish(&out, &buf); - - // Update header with final encoder state - header1.literal_bits = out.accum_nbits; // [-7, 0] - header1.n_literal_payload_bytes = (uint32_t)(buf - s->dst); - header1.literal_state[0] = state0; - header1.literal_state[1] = state1; - header1.literal_state[2] = state2; - header1.literal_state[3] = state3; - - // Update state - s->dst = buf; - - } // literals - - // Encode L,M,D - { - fse_out_stream out; - fse_out_init(&out); - fse_state l_state, m_state, d_state; - l_state = m_state = d_state = 0; - - uint8_t *buf = s->dst; - uint32_t i = s->n_matches; - - // Add 8 padding bytes to the L,M,D payload - if (buf + 8 > s->dst_end) { - ok = 0; - goto END; - } // out full - store8(buf, 0); - buf += 8; - - // We encode starting from the last match so we can decode starting from the - // first - while (i > 0) { - if (buf + 16 > s->dst_end) { - ok = 0; - goto END; - } // out full - i -= 1; - - // D requires 23b max - int32_t d_value = s->d_values[i]; - uint8_t d_symbol = d_base_from_value(d_value); - int32_t d_nbits = d_extra_bits[d_symbol]; - int32_t d_bits = d_value - d_base_value[d_symbol]; - fse_out_push(&out, d_nbits, d_bits); - fse_encode(&d_state, d_encoder, &out, d_symbol); + fse_encode(&state1, literal_encoder, &out, + s->literals[i + 1]); // 10b + fse_encode(&state0, literal_encoder, &out, + s->literals[i + 0]); // 10b + fse_out_flush(&out, &buf); + } + fse_out_finish(&out, &buf); + + // Update header with final encoder state + header1.literal_bits = out.accum_nbits; // [-7, 0] + header1.n_literal_payload_bytes = (uint32_t)(buf - s->dst); + header1.literal_state[0] = state0; + header1.literal_state[1] = state1; + header1.literal_state[2] = state2; + header1.literal_state[3] = state3; + + // Update state + s->dst = buf; + + } // literals + + // Encode L,M,D + { + fse_out_stream out; + fse_out_init(&out); + fse_state l_state, m_state, d_state; + l_state = m_state = d_state = 0; + + uint8_t *buf = s->dst; + uint32_t i = s->n_matches; + + // Add 8 padding bytes to the L,M,D payload + if (buf + 8 > s->dst_end) { + ok = 0; + goto END; + } // out full + store8(buf, 0); + buf += 8; + + // We encode starting from the last match so we can decode starting from the + // first + while (i > 0) { + if (buf + 16 > s->dst_end) { + ok = 0; + goto END; + } // out full + i -= 1; + + // D requires 23b max + int32_t d_value = s->d_values[i]; + uint8_t d_symbol = d_base_from_value(d_value); + int32_t d_nbits = d_extra_bits[d_symbol]; + int32_t d_bits = d_value - d_base_value[d_symbol]; + fse_out_push(&out, d_nbits, d_bits); + fse_encode(&d_state, d_encoder, &out, d_symbol); #if !FSE_IOSTREAM_64 - fse_out_flush(&out, &buf); + fse_out_flush(&out, &buf); #endif - // M requires 17b max - int32_t m_value = s->m_values[i]; - uint8_t m_symbol = m_base_from_value(m_value); - int32_t m_nbits = m_extra_bits[m_symbol]; - int32_t m_bits = m_value - m_base_value[m_symbol]; - fse_out_push(&out, m_nbits, m_bits); - fse_encode(&m_state, m_encoder, &out, m_symbol); + // M requires 17b max + int32_t m_value = s->m_values[i]; + uint8_t m_symbol = m_base_from_value(m_value); + int32_t m_nbits = m_extra_bits[m_symbol]; + int32_t m_bits = m_value - m_base_value[m_symbol]; + fse_out_push(&out, m_nbits, m_bits); + fse_encode(&m_state, m_encoder, &out, m_symbol); #if !FSE_IOSTREAM_64 - fse_out_flush(&out, &buf); + fse_out_flush(&out, &buf); #endif - // L requires 14b max - int32_t l_value = s->l_values[i]; - uint8_t l_symbol = l_base_from_value(l_value); - int32_t l_nbits = l_extra_bits[l_symbol]; - int32_t l_bits = l_value - l_base_value[l_symbol]; - fse_out_push(&out, l_nbits, l_bits); - fse_encode(&l_state, l_encoder, &out, l_symbol); - fse_out_flush(&out, &buf); - } - fse_out_finish(&out, &buf); - - // Update header with final encoder state - header1.n_lmd_payload_bytes = (uint32_t)(buf - s->dst); - header1.lmd_bits = out.accum_nbits; // [-7, 0] - header1.l_state = l_state; - header1.m_state = m_state; - header1.d_state = d_state; - - // Update state - s->dst = buf; - - } // L,M,D - - // Final state update, here we had enough space in DST, and are not going to - // revert state - s->n_literals = 0; - s->n_matches = 0; - - // Final payload size - header1.n_payload_bytes = - header1.n_literal_payload_bytes + header1.n_lmd_payload_bytes; - - // Encode state info in V2 header (we previously encoded the tables, now we - // set the other fields) - lzfse_encode_v1_state(header2, &header1); + // L requires 14b max + int32_t l_value = s->l_values[i]; + uint8_t l_symbol = l_base_from_value(l_value); + int32_t l_nbits = l_extra_bits[l_symbol]; + int32_t l_bits = l_value - l_base_value[l_symbol]; + fse_out_push(&out, l_nbits, l_bits); + fse_encode(&l_state, l_encoder, &out, l_symbol); + fse_out_flush(&out, &buf); + } + fse_out_finish(&out, &buf); + + // Update header with final encoder state + header1.n_lmd_payload_bytes = (uint32_t)(buf - s->dst); + header1.lmd_bits = out.accum_nbits; // [-7, 0] + header1.l_state = l_state; + header1.m_state = m_state; + header1.d_state = d_state; + + // Update state + s->dst = buf; + + } // L,M,D + + // Final state update, here we had enough space in DST, and are not going to + // revert state + s->n_literals = 0; + s->n_matches = 0; + + // Final payload size + header1.n_payload_bytes = + header1.n_literal_payload_bytes + header1.n_lmd_payload_bytes; + + // Encode state info in V2 header (we previously encoded the tables, now we + // set the other fields) + lzfse_encode_v1_state(header2, &header1); END: - if (!ok) { - // Revert state, DST was full + if (!ok) { + // Revert state, DST was full - // Revert the d_prev encoding - uint32_t d_prev = 0; - for (uint32_t i = 0; i < s->n_matches; i++) { - uint32_t d = s->d_values[i]; - if (d == 0) - s->d_values[i] = d_prev; - else - d_prev = d; - } + // Revert the d_prev encoding + uint32_t d_prev = 0; + for (uint32_t i = 0; i < s->n_matches; i++) { + uint32_t d = s->d_values[i]; + if (d == 0) + s->d_values[i] = d_prev; + else + d_prev = d; + } - // Revert literal count - s->n_literals = n_literals0; + // Revert literal count + s->n_literals = n_literals0; - // Revert DST - s->dst = dst0; + // Revert DST + s->dst = dst0; - return LZFSE_STATUS_DST_FULL; // DST full - } + return LZFSE_STATUS_DST_FULL; // DST full + } - return LZFSE_STATUS_OK; + return LZFSE_STATUS_OK; } /*! @abstract Push a L,M,D match into the STATE. * @return LZFSE_STATUS_OK if OK. * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of * the buffers is full. In that case the state is not modified. */ -static inline int lzfse_push_lmd(lzfse_encoder_state *s, uint32_t L, - uint32_t M, uint32_t D) { - // Check if we have enough space to push the match (we add some margin to copy - // literals faster here, and round final count later) - if (s->n_matches + 1 + 8 > LZFSE_MATCHES_PER_BLOCK) - return LZFSE_STATUS_DST_FULL; // state full - if (s->n_literals + L + 16 > LZFSE_LITERALS_PER_BLOCK) - return LZFSE_STATUS_DST_FULL; // state full - - // Store match - uint32_t n = s->n_matches++; - s->l_values[n] = L; - s->m_values[n] = M; - s->d_values[n] = D; - - // Store literals - uint8_t *dst = s->literals + s->n_literals; - const uint8_t *src = s->src + s->src_literal; - uint8_t *dst_end = dst + L; - if (s->src_literal + L + 16 > s->src_end) { - // Careful at the end of SRC, we can't read 16 bytes - if (L > 0) - memcpy(dst, src, L); - } else { - copy16(dst, src); - dst += 16; - src += 16; - while (dst < dst_end) { - copy16(dst, src); - dst += 16; - src += 16; - } - } - s->n_literals += L; - - // Update state - s->src_literal += L + M; - - return LZFSE_STATUS_OK; +static inline int lzfse_push_lmd(lzfse_encoder_state *s, uint32_t L, uint32_t M, + uint32_t D) +{ + // Check if we have enough space to push the match (we add some margin to copy + // literals faster here, and round final count later) + if (s->n_matches + 1 + 8 > LZFSE_MATCHES_PER_BLOCK) + return LZFSE_STATUS_DST_FULL; // state full + if (s->n_literals + L + 16 > LZFSE_LITERALS_PER_BLOCK) + return LZFSE_STATUS_DST_FULL; // state full + + // Store match + uint32_t n = s->n_matches++; + s->l_values[n] = L; + s->m_values[n] = M; + s->d_values[n] = D; + + // Store literals + uint8_t *dst = s->literals + s->n_literals; + const uint8_t *src = s->src + s->src_literal; + uint8_t *dst_end = dst + L; + if (s->src_literal + L + 16 > s->src_end) { + // Careful at the end of SRC, we can't read 16 bytes + if (L > 0) + memcpy(dst, src, L); + } else { + copy16(dst, src); + dst += 16; + src += 16; + while (dst < dst_end) { + copy16(dst, src); + dst += 16; + src += 16; + } + } + s->n_literals += L; + + // Update state + s->src_literal += L + M; + + return LZFSE_STATUS_OK; } /*! @abstract Split MATCH into one or more L,M,D parts, and push to STATE. * @return LZFSE_STATUS_OK if OK. * @return LZFSE_STATUS_DST_FULL if the match can't be pushed, meaning one of the * buffers is full. In that case the state is not modified. */ -static int lzfse_push_match(lzfse_encoder_state *s, const lzfse_match *match) { - // Save the initial n_matches, n_literals, src_literal - uint32_t n_matches0 = s->n_matches; - uint32_t n_literals0 = s->n_literals; - lzfse_offset src_literals0 = s->src_literal; - - // L,M,D - uint32_t L = (uint32_t)(match->pos - s->src_literal); // literal count - uint32_t M = match->length; // match length - uint32_t D = (uint32_t)(match->pos - match->ref); // match distance - int ok = 1; - - // Split L if too large - while (L > LZFSE_ENCODE_MAX_L_VALUE) { - if (lzfse_push_lmd(s, LZFSE_ENCODE_MAX_L_VALUE, 0, 1) != 0) { - ok = 0; - goto END; - } // take D=1 because most frequent, but not actually used - L -= LZFSE_ENCODE_MAX_L_VALUE; - } - - // Split if M too large - while (M > LZFSE_ENCODE_MAX_M_VALUE) { - if (lzfse_push_lmd(s, L, LZFSE_ENCODE_MAX_M_VALUE, D) != 0) { - ok = 0; - goto END; - } - L = 0; - M -= LZFSE_ENCODE_MAX_M_VALUE; - } - - // L,M in range - if (L > 0 || M > 0) { - if (lzfse_push_lmd(s, L, M, D) != 0) { - ok = 0; - goto END; - } - L = M = 0; - (void)L; - (void)M; // dead stores - } +static int lzfse_push_match(lzfse_encoder_state *s, const lzfse_match *match) +{ + // Save the initial n_matches, n_literals, src_literal + uint32_t n_matches0 = s->n_matches; + uint32_t n_literals0 = s->n_literals; + lzfse_offset src_literals0 = s->src_literal; + + // L,M,D + uint32_t L = (uint32_t)(match->pos - s->src_literal); // literal count + uint32_t M = match->length; // match length + uint32_t D = (uint32_t)(match->pos - match->ref); // match distance + int ok = 1; + + // Split L if too large + while (L > LZFSE_ENCODE_MAX_L_VALUE) { + if (lzfse_push_lmd(s, LZFSE_ENCODE_MAX_L_VALUE, 0, 1) != 0) { + ok = 0; + goto END; + } // take D=1 because most frequent, but not actually used + L -= LZFSE_ENCODE_MAX_L_VALUE; + } + + // Split if M too large + while (M > LZFSE_ENCODE_MAX_M_VALUE) { + if (lzfse_push_lmd(s, L, LZFSE_ENCODE_MAX_M_VALUE, D) != 0) { + ok = 0; + goto END; + } + L = 0; + M -= LZFSE_ENCODE_MAX_M_VALUE; + } + + // L,M in range + if (L > 0 || M > 0) { + if (lzfse_push_lmd(s, L, M, D) != 0) { + ok = 0; + goto END; + } + L = M = 0; + (void)L; + (void)M; // dead stores + } END: - if (!ok) { - // Revert state - s->n_matches = n_matches0; - s->n_literals = n_literals0; - s->src_literal = src_literals0; + if (!ok) { + // Revert state + s->n_matches = n_matches0; + s->n_literals = n_literals0; + s->src_literal = src_literals0; - return LZFSE_STATUS_DST_FULL; // state tables full - } + return LZFSE_STATUS_DST_FULL; // state tables full + } - return LZFSE_STATUS_OK; // OK + return LZFSE_STATUS_OK; // OK } /*! @abstract Backend: add MATCH to state S. Encode block if necessary, when @@ -524,18 +528,18 @@ static int lzfse_push_match(lzfse_encoder_state *s, const lzfse_match *match) { * @return LZFSE_STATUS_OK if OK. * @return LZFSE_STATUS_DST_FULL if the match can't be added, meaning one of the * buffers is full. In that case the state is not modified. */ -static int lzfse_backend_match(lzfse_encoder_state *s, - const lzfse_match *match) { - // Try to push the match in state - if (lzfse_push_match(s, match) == LZFSE_STATUS_OK) - return LZFSE_STATUS_OK; // OK, match added to state - - // Here state tables are full, try to emit block - if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) - return LZFSE_STATUS_DST_FULL; // DST full, match not added - - // Here block has been emitted, re-try to push the match in state - return lzfse_push_match(s, match); +static int lzfse_backend_match(lzfse_encoder_state *s, const lzfse_match *match) +{ + // Try to push the match in state + if (lzfse_push_match(s, match) == LZFSE_STATUS_OK) + return LZFSE_STATUS_OK; // OK, match added to state + + // Here state tables are full, try to emit block + if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) + return LZFSE_STATUS_DST_FULL; // DST full, match not added + + // Here block has been emitted, re-try to push the match in state + return lzfse_push_match(s, match); } /*! @abstract Backend: add L literals to state S. Encode block if necessary, @@ -543,14 +547,15 @@ static int lzfse_backend_match(lzfse_encoder_state *s, * @return LZFSE_STATUS_OK if OK. * @return LZFSE_STATUS_DST_FULL if the literals can't be added, meaning one of * the buffers is full. In that case the state is not modified. */ -static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) { - // Create a fake match with M=0, D=1 - lzfse_match match; - lzfse_offset pos = s->src_literal + L; - match.pos = pos; - match.ref = match.pos - 1; - match.length = 0; - return lzfse_backend_match(s, &match); +static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) +{ + // Create a fake match with M=0, D=1 + lzfse_match match; + lzfse_offset pos = s->src_literal + L; + match.pos = pos; + match.ref = match.pos - 1; + match.length = 0; + return lzfse_backend_match(s, &match); } /*! @abstract Backend: flush final block, and emit end of stream @@ -558,23 +563,25 @@ static int lzfse_backend_literals(lzfse_encoder_state *s, lzfse_offset L) { * @return LZFSE_STATUS_DST_FULL if either the final block, or the end-of-stream * can't be added, meaning one of the buffers is full. If the block was emitted, * the state is updated to reflect this. Otherwise, it is left unchanged. */ -static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) { - // Final match triggers write, otherwise emit blocks when we have enough - // matches stored - if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) - return LZFSE_STATUS_DST_FULL; // DST full - - // Emit end-of-stream block - if (s->dst + 4 > s->dst_end) - return LZFSE_STATUS_DST_FULL; // DST full - store4(s->dst, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); - s->dst += 4; - - return LZFSE_STATUS_OK; // OK +static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) +{ + // Final match triggers write, otherwise emit blocks when we have enough + // matches stored + if (lzfse_encode_matches(s) != LZFSE_STATUS_OK) + return LZFSE_STATUS_DST_FULL; // DST full + + // Emit end-of-stream block + if (s->dst + 4 > s->dst_end) + return LZFSE_STATUS_DST_FULL; // DST full + store4(s->dst, LZFSE_ENDOFSTREAM_BLOCK_MAGIC); + s->dst += 4; + + return LZFSE_STATUS_OK; // OK } -// =============================================================== -// Encoder state management +/* =============================================================== + * Encoder state management + */ /*! @abstract Initialize state: * @code @@ -584,243 +591,262 @@ static int lzfse_backend_end_of_stream(lzfse_encoder_state *s) { * - d_prev to 0. @endcode * @return LZFSE_STATUS_OK */ -int lzfse_encode_init(lzfse_encoder_state *s) { - const lzfse_match NO_MATCH = {0}; - lzfse_history_set line; - for (int i = 0; i < LZFSE_ENCODE_HASH_WIDTH; i++) { - line.pos[i] = -4 * LZFSE_ENCODE_MAX_D_VALUE; // invalid pos - line.value[i] = 0; - } - // Fill table - for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) - s->history_table[i] = line; - s->pending = NO_MATCH; - s->src_literal = 0; - - return LZFSE_STATUS_OK; // OK +int lzfse_encode_init(lzfse_encoder_state *s) +{ + const lzfse_match NO_MATCH = { 0 }; + lzfse_history_set line; + for (int i = 0; i < LZFSE_ENCODE_HASH_WIDTH; i++) { + line.pos[i] = -4 * LZFSE_ENCODE_MAX_D_VALUE; // invalid pos + line.value[i] = 0; + } + // Fill table + for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) + s->history_table[i] = line; + s->pending = NO_MATCH; + s->src_literal = 0; + + return LZFSE_STATUS_OK; // OK } /*! @abstract Translate state \p src forward by \p delta > 0. * Offsets in \p src are updated backwards to point to the same positions. * @return LZFSE_STATUS_OK */ -int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta) { - if (delta == 0) - return LZFSE_STATUS_OK; // OK - - // SRC - s->src += delta; - - // Offsets in SRC - s->src_end -= delta; - s->src_encode_i -= delta; - s->src_encode_end -= delta; - s->src_literal -= delta; - - // Pending match - s->pending.pos -= delta; - s->pending.ref -= delta; - - // history_table positions, translated, and clamped to invalid pos - int32_t invalidPos = -4 * LZFSE_ENCODE_MAX_D_VALUE; - for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) { - int32_t *p = &(s->history_table[i].pos[0]); - for (int j = 0; j < LZFSE_ENCODE_HASH_WIDTH; j++) { - lzfse_offset newPos = p[j] - delta; // translate - p[j] = (int32_t)((newPos < invalidPos) ? invalidPos : newPos); // clamp - } - } - - return LZFSE_STATUS_OK; // OK +int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta) +{ + if (delta == 0) + return LZFSE_STATUS_OK; // OK + + // SRC + s->src += delta; + + // Offsets in SRC + s->src_end -= delta; + s->src_encode_i -= delta; + s->src_encode_end -= delta; + s->src_literal -= delta; + + // Pending match + s->pending.pos -= delta; + s->pending.ref -= delta; + + // history_table positions, translated, and clamped to invalid pos + int32_t invalidPos = -4 * LZFSE_ENCODE_MAX_D_VALUE; + for (int i = 0; i < LZFSE_ENCODE_HASH_VALUES; i++) { + int32_t *p = &(s->history_table[i].pos[0]); + for (int j = 0; j < LZFSE_ENCODE_HASH_WIDTH; j++) { + lzfse_offset newPos = p[j] - delta; // translate + p[j] = (int32_t)((newPos < invalidPos) ? + invalidPos : + newPos); // clamp + } + } + + return LZFSE_STATUS_OK; // OK } -// =============================================================== -// Encoder front end - -int lzfse_encode_base(lzfse_encoder_state *s) { - lzfse_history_set *history_table = s->history_table; - lzfse_history_set *hashLine = 0; - lzfse_history_set newH; - const lzfse_match NO_MATCH = {0}; - int ok = 1; - - memset(&newH, 0x00, sizeof(newH)); - - // 8 byte padding at end of buffer - s->src_encode_end = s->src_end - 8; - for (; s->src_encode_i < s->src_encode_end; s->src_encode_i++) { - lzfse_offset pos = s->src_encode_i; // pos >= 0 - - // Load 4 byte value and get hash line - uint32_t x = load4(s->src + pos); - hashLine = history_table + hashX(x); - lzfse_history_set h = *hashLine; - - // Prepare next hash line (component 0 is the most recent) to prepare new - // entries (stored later) - { - newH.pos[0] = (int32_t)pos; - for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) - newH.pos[k + 1] = h.pos[k]; - newH.value[0] = x; - for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) - newH.value[k + 1] = h.value[k]; - } - - // Do not look for a match if we are still covered by a previous match - if (pos < s->src_literal) - goto END_POS; - - // Search best incoming match - lzfse_match incoming = {.pos = pos, .ref = 0, .length = 0}; - - // Check for matches. We consider matches of length >= 4 only. - for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH; k++) { - uint32_t d = h.value[k] ^ x; - if (d) - continue; // no 4 byte match - int32_t ref = h.pos[k]; - if (ref + LZFSE_ENCODE_MAX_D_VALUE < pos) - continue; // too far - - const uint8_t *src_ref = s->src + ref; - const uint8_t *src_pos = s->src + pos; - uint32_t length = 4; - uint32_t maxLength = - (uint32_t)(s->src_end - pos - 8); // ensure we don't hit the end of SRC - while (length < maxLength) { - uint64_t d = load8(src_ref + length) ^ load8(src_pos + length); - if (d == 0) { - length += 8; - continue; - } - - length += - (__builtin_ctzll(d) >> 3); // ctzll must be called only with D != 0 - break; - } - if (length > incoming.length) { - incoming.length = length; - incoming.ref = ref; - } // keep if longer - } - - // No incoming match? - if (incoming.length == 0) { - // We may still want to emit some literals here, to not lag too far behind - // the current search point, and avoid - // ending up with a literal block not fitting in the state. - lzfse_offset n_literals = pos - s->src_literal; - // The threshold here should be larger than a couple of MAX_L_VALUE, and - // much smaller than LITERALS_PER_BLOCK - if (n_literals > 8 * LZFSE_ENCODE_MAX_L_VALUE) { - // Here, we need to consume some literals. Emit pending match if there - // is one - if (s->pending.length > 0) { - if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { - ok = 0; - goto END; - } - s->pending = NO_MATCH; - } else { - // No pending match, emit a full LZFSE_ENCODE_MAX_L_VALUE block of - // literals - if (lzfse_backend_literals(s, LZFSE_ENCODE_MAX_L_VALUE) != - LZFSE_STATUS_OK) { - ok = 0; - goto END; - } - } - } - goto END_POS; // no incoming match - } - - // Limit match length (it may still be expanded backwards, but this is - // bounded by the limit on literals we tested before) - if (incoming.length > LZFSE_ENCODE_MAX_MATCH_LENGTH) { - incoming.length = LZFSE_ENCODE_MAX_MATCH_LENGTH; - } - - // Expand backwards (since this is expensive, we do this for the best match - // only) - while (incoming.pos > s->src_literal && incoming.ref > 0 && - s->src[incoming.ref - 1] == s->src[incoming.pos - 1]) { - incoming.pos--; - incoming.ref--; - } - incoming.length += pos - incoming.pos; // update length after expansion - - // Match filtering heuristic (from LZVN). INCOMING is always defined here. - - // Incoming is 'good', emit incoming - if (incoming.length >= LZFSE_ENCODE_GOOD_MATCH) { - if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) { - ok = 0; - goto END; - } - s->pending = NO_MATCH; - goto END_POS; - } - - // No pending, keep incoming - if (s->pending.length == 0) { - s->pending = incoming; - goto END_POS; - } - - // No overlap, emit pending, keep incoming - if (s->pending.pos + s->pending.length <= incoming.pos) { - if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { - ok = 0; - goto END; - } - s->pending = incoming; - goto END_POS; - } - - // Overlap: emit longest - if (incoming.length > s->pending.length) { - if (lzfse_backend_match(s, &incoming) != LZFSE_STATUS_OK) { - ok = 0; - goto END; - } - } else { - if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) { - ok = 0; - goto END; - } - } - s->pending = NO_MATCH; - - END_POS: - // We are done with this src_encode_i. - // Update state now (s->pending has already been updated). - *hashLine = newH; - } +/* =============================================================== + * Encoder front end + */ + +int lzfse_encode_base(lzfse_encoder_state *s) +{ + lzfse_history_set *history_table = s->history_table; + lzfse_history_set *hashLine = 0; + lzfse_history_set newH; + const lzfse_match NO_MATCH = { 0 }; + int ok = 1; + + memset(&newH, 0x00, sizeof(newH)); + + // 8 byte padding at end of buffer + s->src_encode_end = s->src_end - 8; + for (; s->src_encode_i < s->src_encode_end; s->src_encode_i++) { + lzfse_offset pos = s->src_encode_i; // pos >= 0 + + // Load 4 byte value and get hash line + uint32_t x = load4(s->src + pos); + hashLine = history_table + hashX(x); + lzfse_history_set h = *hashLine; + + // Prepare next hash line (component 0 is the most recent) to prepare new + // entries (stored later) + { + newH.pos[0] = (int32_t)pos; + for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) + newH.pos[k + 1] = h.pos[k]; + newH.value[0] = x; + for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH - 1; k++) + newH.value[k + 1] = h.value[k]; + } + + // Do not look for a match if we are still covered by a previous match + if (pos < s->src_literal) + goto END_POS; + + // Search best incoming match + lzfse_match incoming = { .pos = pos, .ref = 0, .length = 0 }; + + // Check for matches. We consider matches of length >= 4 only. + for (int k = 0; k < LZFSE_ENCODE_HASH_WIDTH; k++) { + uint32_t d = h.value[k] ^ x; + if (d) + continue; // no 4 byte match + int32_t ref = h.pos[k]; + if (ref + LZFSE_ENCODE_MAX_D_VALUE < pos) + continue; // too far + + const uint8_t *src_ref = s->src + ref; + const uint8_t *src_pos = s->src + pos; + uint32_t length = 4; + uint32_t maxLength = + (uint32_t)(s->src_end - pos - + 8); // ensure we don't hit the end of SRC + while (length < maxLength) { + uint64_t d = load8(src_ref + length) ^ + load8(src_pos + length); + if (d == 0) { + length += 8; + continue; + } + + length += + (__builtin_ctzll(d) >> + 3); // ctzll must be called only with D != 0 + break; + } + if (length > incoming.length) { + incoming.length = length; + incoming.ref = ref; + } // keep if longer + } + + // No incoming match? + if (incoming.length == 0) { + // We may still want to emit some literals here, to not lag too far behind + // the current search point, and avoid + // ending up with a literal block not fitting in the state. + lzfse_offset n_literals = pos - s->src_literal; + // The threshold here should be larger than a couple of MAX_L_VALUE, and + // much smaller than LITERALS_PER_BLOCK + if (n_literals > 8 * LZFSE_ENCODE_MAX_L_VALUE) { + // Here, we need to consume some literals. Emit pending match if there + // is one + if (s->pending.length > 0) { + if (lzfse_backend_match(s, + &s->pending) != + LZFSE_STATUS_OK) { + ok = 0; + goto END; + } + s->pending = NO_MATCH; + } else { + // No pending match, emit a full LZFSE_ENCODE_MAX_L_VALUE block of + // literals + if (lzfse_backend_literals( + s, + LZFSE_ENCODE_MAX_L_VALUE) != + LZFSE_STATUS_OK) { + ok = 0; + goto END; + } + } + } + goto END_POS; // no incoming match + } + + // Limit match length (it may still be expanded backwards, but this is + // bounded by the limit on literals we tested before) + if (incoming.length > LZFSE_ENCODE_MAX_MATCH_LENGTH) { + incoming.length = LZFSE_ENCODE_MAX_MATCH_LENGTH; + } + + // Expand backwards (since this is expensive, we do this for the best match + // only) + while (incoming.pos > s->src_literal && incoming.ref > 0 && + s->src[incoming.ref - 1] == s->src[incoming.pos - 1]) { + incoming.pos--; + incoming.ref--; + } + incoming.length += + pos - incoming.pos; // update length after expansion + + // Match filtering heuristic (from LZVN). INCOMING is always defined here. + + // Incoming is 'good', emit incoming + if (incoming.length >= LZFSE_ENCODE_GOOD_MATCH) { + if (lzfse_backend_match(s, &incoming) != + LZFSE_STATUS_OK) { + ok = 0; + goto END; + } + s->pending = NO_MATCH; + goto END_POS; + } + + // No pending, keep incoming + if (s->pending.length == 0) { + s->pending = incoming; + goto END_POS; + } + + // No overlap, emit pending, keep incoming + if (s->pending.pos + s->pending.length <= incoming.pos) { + if (lzfse_backend_match(s, &s->pending) != + LZFSE_STATUS_OK) { + ok = 0; + goto END; + } + s->pending = incoming; + goto END_POS; + } + + // Overlap: emit longest + if (incoming.length > s->pending.length) { + if (lzfse_backend_match(s, &incoming) != + LZFSE_STATUS_OK) { + ok = 0; + goto END; + } + } else { + if (lzfse_backend_match(s, &s->pending) != + LZFSE_STATUS_OK) { + ok = 0; + goto END; + } + } + s->pending = NO_MATCH; + +END_POS: + // We are done with this src_encode_i. + // Update state now (s->pending has already been updated). + *hashLine = newH; + } END: - return ok ? LZFSE_STATUS_OK : LZFSE_STATUS_DST_FULL; + return ok ? LZFSE_STATUS_OK : LZFSE_STATUS_DST_FULL; } -int lzfse_encode_finish(lzfse_encoder_state *s) { - const lzfse_match NO_MATCH = {0}; - - // Emit pending match - if (s->pending.length > 0) { - if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) - return LZFSE_STATUS_DST_FULL; - s->pending = NO_MATCH; - } - - // Emit final literals if any - lzfse_offset L = s->src_end - s->src_literal; - if (L > 0) { - if (lzfse_backend_literals(s, L) != LZFSE_STATUS_OK) - return LZFSE_STATUS_DST_FULL; - } - - // Emit all matches, and end-of-stream block - if (lzfse_backend_end_of_stream(s) != LZFSE_STATUS_OK) - return LZFSE_STATUS_DST_FULL; - - return LZFSE_STATUS_OK; +int lzfse_encode_finish(lzfse_encoder_state *s) +{ + const lzfse_match NO_MATCH = { 0 }; + + // Emit pending match + if (s->pending.length > 0) { + if (lzfse_backend_match(s, &s->pending) != LZFSE_STATUS_OK) + return LZFSE_STATUS_DST_FULL; + s->pending = NO_MATCH; + } + + // Emit final literals if any + lzfse_offset L = s->src_end - s->src_literal; + if (L > 0) { + if (lzfse_backend_literals(s, L) != LZFSE_STATUS_OK) + return LZFSE_STATUS_DST_FULL; + } + + // Emit all matches, and end-of-stream block + if (lzfse_backend_end_of_stream(s) != LZFSE_STATUS_OK) + return LZFSE_STATUS_DST_FULL; + + return LZFSE_STATUS_OK; } diff --git a/lzfse/lzfse_encode_tables.h b/lzfse/lzfse_encode_tables.h index 81c9c70..da8daac 100644 --- a/lzfse/lzfse_encode_tables.h +++ b/lzfse/lzfse_encode_tables.h @@ -1,218 +1,224 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ #ifndef LZFSE_ENCODE_TABLES_H #define LZFSE_ENCODE_TABLES_H -#if defined(_MSC_VER) && !defined(__clang__) -# define inline __inline -#endif - -static inline uint8_t l_base_from_value(int32_t value) { - static const uint8_t sym[LZFSE_ENCODE_MAX_L_VALUE + 1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, - 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19}; - return sym[value]; +static inline uint8_t l_base_from_value(int32_t value) +{ + static const uint8_t sym[LZFSE_ENCODE_MAX_L_VALUE + 1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19 + }; + return sym[value]; } -static inline uint8_t m_base_from_value(int32_t value) { - static const uint8_t sym[LZFSE_ENCODE_MAX_M_VALUE + 1] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, - 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, - 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, - 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, - 19, 19}; - return sym[value]; +static inline uint8_t m_base_from_value(int32_t value) +{ + static const uint8_t sym[LZFSE_ENCODE_MAX_M_VALUE + 1] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 19, 19, 19, 19 + }; + return sym[value]; } -static inline uint8_t d_base_from_value(int32_t value) { - static const uint8_t sym[64 * 4] = { - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, - 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, - 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 17, 18, 19, 20, 20, 21, 21, - 22, 22, 23, 23, 24, 24, 24, 24, 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, - 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, - 30, 30, 30, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, - 32, 32, 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 40, - 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, - 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, - 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 49, 50, 51, 52, 52, - 53, 53, 54, 54, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, 58, 58, 58, - 59, 59, 59, 59, 60, 60, 60, 60, 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, - 61, 61, 62, 62, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, - 0, 0, 0, 0}; - int index = 0; - int in_range_k; - in_range_k = (value >= 0 && value < 60); - index |= (((value - 0) >> 0) + 0) & -in_range_k; - in_range_k = (value >= 60 && value < 1020); - index |= (((value - 60) >> 4) + 64) & -in_range_k; - in_range_k = (value >= 1020 && value < 16380); - index |= (((value - 1020) >> 8) + 128) & -in_range_k; - in_range_k = (value >= 16380 && value < 262140); - index |= (((value - 16380) >> 12) + 192) & -in_range_k; - return sym[index & 255]; +static inline uint8_t d_base_from_value(int32_t value) +{ + static const uint8_t sym[64 * 4] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, + 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, + 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, + 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 24, 24, + 25, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 28, 28, 28, 28, + 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, + 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 32, 32, 32, 32, + 32, 33, 34, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 40, 40, + 41, 41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 43, 44, 44, 44, 44, + 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, + 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, + 48, 49, 50, 51, 52, 52, 53, 53, 54, 54, 55, 55, 56, 56, 56, 56, + 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, + 60, 60, 60, 60, 61, 61, 61, 61, 61, 61, 61, 61, 62, 62, 62, 62, + 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, 63, 63, 0, 0, 0, 0 + }; + int index = 0; + int in_range_k; + in_range_k = (value >= 0 && value < 60); + index |= (((value - 0) >> 0) + 0) & -in_range_k; + in_range_k = (value >= 60 && value < 1020); + index |= (((value - 60) >> 4) + 64) & -in_range_k; + in_range_k = (value >= 1020 && value < 16380); + index |= (((value - 1020) >> 8) + 128) & -in_range_k; + in_range_k = (value >= 16380 && value < 262140); + index |= (((value - 16380) >> 12) + 192) & -in_range_k; + return sym[index & 255]; } #endif // LZFSE_ENCODE_TABLES_H diff --git a/lzfse/lzfse_fse.c b/lzfse/lzfse_fse.c index 2bf37a6..50329cc 100644 --- a/lzfse/lzfse_fse.c +++ b/lzfse/lzfse_fse.c @@ -1,217 +1,214 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ #include "lzfse_internal.h" -// Initialize encoder table T[NSYMBOLS]. -// NSTATES = sum FREQ[i] is the number of states (a power of 2) -// NSYMBOLS is the number of symbols. -// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] -// >= 0. -// Some symbols may have a 0 frequency. In that case, they should not be -// present in the data. +/* Initialize encoder table T[NSYMBOLS]. + * NSTATES = sum FREQ[i] is the number of states (a power of 2) + * NSYMBOLS is the number of symbols. + * FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] + * >= 0. + * Some symbols may have a 0 frequency. In that case, they should not be + * present in the data. + */ void fse_init_encoder_table(int nstates, int nsymbols, - const uint16_t *__restrict freq, - fse_encoder_entry *__restrict t) { - int offset = 0; // current offset - int n_clz = __builtin_clz(nstates); - int i; - for (i = 0; i < nsymbols; i++) { - int f = (int)freq[i]; - int k; - if (f == 0) - continue; // skip this symbol, no occurrences - k = __builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<> k)); - t[i].delta1 = (int16_t)(offset - f + (nstates >> (k - 1))); - offset += f; - } + const uint16_t *__restrict freq, + fse_encoder_entry *__restrict t) +{ + int offset = 0; // current offset + int n_clz = __builtin_clz(nstates); + int i; + for (i = 0; i < nsymbols; i++) { + int f = (int)freq[i]; + int k; + if (f == 0) + continue; // skip this symbol, no occurrences + k = __builtin_clz(f) - + n_clz; // shift needed to ensure N <= (F<> k)); + t[i].delta1 = (int16_t)(offset - f + (nstates >> (k - 1))); + offset += f; + } } -// Initialize decoder table T[NSTATES]. -// NSTATES = sum FREQ[i] is the number of states (a power of 2) -// NSYMBOLS is the number of symbols. -// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] -// >= 0. -// Some symbols may have a 0 frequency. In that case, they should not be -// present in the data. +/* Initialize decoder table T[NSTATES]. + * NSTATES = sum FREQ[i] is the number of states (a power of 2) + * NSYMBOLS is the number of symbols. + * FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] + * >= 0. + * Some symbols may have a 0 frequency. In that case, they should not be + * present in the data. + */ int fse_init_decoder_table(int nstates, int nsymbols, - const uint16_t *__restrict freq, - int32_t *__restrict t) { - int n_clz = __builtin_clz(nstates); - int sum_of_freq = 0; - int i, j0, j; - for (i = 0; i < nsymbols; i++) { - int f = (int)freq[i]; - int k; - if (f == 0) - continue; // skip this symbol, no occurrences - - sum_of_freq += f; - - if (sum_of_freq > nstates) { - return -1; - } - - k = __builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<> k) - f; - - // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F - for (j = 0; j < f; j++) { - fse_decoder_entry e; - - e.symbol = (uint8_t)i; - if (j < j0) { - e.k = (int8_t)k; - e.delta = (int16_t)(((f + j) << k) - nstates); - } else { - e.k = (int8_t)(k - 1); - e.delta = (int16_t)((j - j0) << (k - 1)); - } - - memcpy(t, &e, sizeof(e)); - t++; - } - } - - return 0; // OK + const uint16_t *__restrict freq, + int32_t *__restrict t) +{ + int n_clz = __builtin_clz(nstates); + int sum_of_freq = 0; + int i, j0, j; + for (i = 0; i < nsymbols; i++) { + int f = (int)freq[i]; + int k; + if (f == 0) + continue; // skip this symbol, no occurrences + + sum_of_freq += f; + + if (sum_of_freq > nstates) { + return -1; + } + + k = __builtin_clz(f) - + n_clz; // shift needed to ensure N <= (F<> k) - f; + + // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F + for (j = 0; j < f; j++) { + fse_decoder_entry e; + + e.symbol = (uint8_t)i; + if (j < j0) { + e.k = (int8_t)k; + e.delta = (int16_t)(((f + j) << k) - nstates); + } else { + e.k = (int8_t)(k - 1); + e.delta = (int16_t)((j - j0) << (k - 1)); + } + + memcpy(t, &e, sizeof(e)); + t++; + } + } + + return 0; // OK } -// Initialize value decoder table T[NSTATES]. -// NSTATES = sum FREQ[i] is the number of states (a power of 2) -// NSYMBOLS is the number of symbols. -// FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] -// >= 0. -// SYMBOL_VBITS[NSYMBOLS] and SYMBOLS_VBASE[NSYMBOLS] are the number of value -// bits to read and the base value for each symbol. -// Some symbols may have a 0 frequency. In that case, they should not be -// present in the data. +/* Initialize value decoder table T[NSTATES]. + * NSTATES = sum FREQ[i] is the number of states (a power of 2) + * NSYMBOLS is the number of symbols. + * FREQ[NSYMBOLS] is a normalized histogram of symbol frequencies, with FREQ[i] + * >= 0. + * SYMBOL_VBITS[NSYMBOLS] and SYMBOLS_VBASE[NSYMBOLS] are the number of value + * bits to read and the base value for each symbol. + * Some symbols may have a 0 frequency. In that case, they should not be + * present in the data. + */ void fse_init_value_decoder_table(int nstates, int nsymbols, - const uint16_t *__restrict freq, - const uint8_t *__restrict symbol_vbits, - const int32_t *__restrict symbol_vbase, - fse_value_decoder_entry *__restrict t) { - int n_clz = __builtin_clz(nstates); - int i; - for (i = 0; i < nsymbols; i++) { - fse_value_decoder_entry ei = {0}; - int f = (int)freq[i]; - int k, j0, j; - if (f == 0) - continue; // skip this symbol, no occurrences - - k = __builtin_clz(f) - n_clz; // shift needed to ensure N <= (F<> k) - f; - - ei.value_bits = symbol_vbits[i]; - ei.vbase = symbol_vbase[i]; - - // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F - for (j = 0; j < f; j++) { - fse_value_decoder_entry e = ei; - - if (j < j0) { - e.total_bits = (uint8_t)k + e.value_bits; - e.delta = (int16_t)(((f + j) << k) - nstates); - } else { - e.total_bits = (uint8_t)(k - 1) + e.value_bits; - e.delta = (int16_t)((j - j0) << (k - 1)); - } - - memcpy(t, &e, 8); - t++; - } - } + const uint16_t *__restrict freq, + const uint8_t *__restrict symbol_vbits, + const int32_t *__restrict symbol_vbase, + fse_value_decoder_entry *__restrict t) +{ + int n_clz = __builtin_clz(nstates); + int i; + for (i = 0; i < nsymbols; i++) { + fse_value_decoder_entry ei = { 0 }; + int f = (int)freq[i]; + int k, j0, j; + if (f == 0) + continue; // skip this symbol, no occurrences + + k = __builtin_clz(f) - + n_clz; // shift needed to ensure N <= (F<> k) - f; + + ei.value_bits = symbol_vbits[i]; + ei.vbase = symbol_vbase[i]; + + // Initialize all states S reached by this symbol: OFFSET <= S < OFFSET + F + for (j = 0; j < f; j++) { + fse_value_decoder_entry e = ei; + + if (j < j0) { + e.total_bits = (uint8_t)k + e.value_bits; + e.delta = (int16_t)(((f + j) << k) - nstates); + } else { + e.total_bits = (uint8_t)(k - 1) + e.value_bits; + e.delta = (int16_t)((j - j0) << (k - 1)); + } + + memcpy(t, &e, 8); + t++; + } + } } -// Remove states from symbols until the correct number of states is used. -static void fse_adjust_freqs(uint16_t *freq, int overrun, int nsymbols) { - int shift; - for (shift = 3; overrun != 0; shift--) { - int sym; - for (sym = 0; sym < nsymbols; sym++) { - if (freq[sym] > 1) { - int n = (freq[sym] - 1) >> shift; - if (n > overrun) - n = overrun; - freq[sym] -= n; - overrun -= n; - if (overrun == 0) - break; - } - } - } +/* Remove states from symbols until the correct number of states is used. + */ +static void fse_adjust_freqs(uint16_t *freq, int overrun, int nsymbols) +{ + int shift; + for (shift = 3; overrun != 0; shift--) { + int sym; + for (sym = 0; sym < nsymbols; sym++) { + if (freq[sym] > 1) { + int n = (freq[sym] - 1) >> shift; + if (n > overrun) + n = overrun; + freq[sym] -= n; + overrun -= n; + if (overrun == 0) + break; + } + } + } } -// Normalize a table T[NSYMBOLS] of occurrences to FREQ[NSYMBOLS]. +/* Normalize a table T[NSYMBOLS] of occurrences to FREQ[NSYMBOLS]. + */ void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, - uint16_t *__restrict freq) { - uint32_t s_count = 0; - int remaining = nstates; // must be signed; this may become < 0 - int max_freq = 0; - int max_freq_sym = 0; - int shift = __builtin_clz(nstates) - 1; - uint32_t highprec_step; - int i; - - // Compute the total number of symbol occurrences - for (i = 0; i < nsymbols; i++) - s_count += t[i]; - - if (s_count == 0) - highprec_step = 0; // no symbols used - else - highprec_step = ((uint32_t)1 << 31) / s_count; - - for (i = 0; i < nsymbols; i++) { - - // Rescale the occurrence count to get the normalized frequency. - // Round up if the fractional part is >= 0.5; otherwise round down. - // For efficiency, we do this calculation using integer arithmetic. - int f = (((t[i] * highprec_step) >> shift) + 1) >> 1; - - // If a symbol was used, it must be given a nonzero normalized frequency. - if (f == 0 && t[i] != 0) - f = 1; - - freq[i] = f; - remaining -= f; - - // Remember the maximum frequency and which symbol had it. - if (f > max_freq) { - max_freq = f; - max_freq_sym = i; - } - } - - // If there remain states to be assigned, then just assign them to the most - // frequent symbol. Alternatively, if we assigned more states than were - // actually available, then either remove states from the most frequent symbol - // (for minor overruns) or use the slower adjustment algorithm (for major - // overruns). - if (-remaining < (max_freq >> 2)) { - freq[max_freq_sym] += remaining; - } else { - fse_adjust_freqs(freq, -remaining, nsymbols); - } + uint16_t *__restrict freq) +{ + uint32_t s_count = 0; + int remaining = nstates; // must be signed; this may become < 0 + int max_freq = 0; + int max_freq_sym = 0; + int shift = __builtin_clz(nstates) - 1; + uint32_t highprec_step; + int i; + + // Compute the total number of symbol occurrences + for (i = 0; i < nsymbols; i++) + s_count += t[i]; + + if (s_count == 0) + highprec_step = 0; // no symbols used + else + highprec_step = ((uint32_t)1 << 31) / s_count; + + for (i = 0; i < nsymbols; i++) { + // Rescale the occurrence count to get the normalized frequency. + // Round up if the fractional part is >= 0.5; otherwise round down. + // For efficiency, we do this calculation using integer arithmetic. + int f = (((t[i] * highprec_step) >> shift) + 1) >> 1; + + // If a symbol was used, it must be given a nonzero normalized frequency. + if (f == 0 && t[i] != 0) + f = 1; + + freq[i] = f; + remaining -= f; + + // Remember the maximum frequency and which symbol had it. + if (f > max_freq) { + max_freq = f; + max_freq_sym = i; + } + } + + // If there remain states to be assigned, then just assign them to the most + // frequent symbol. Alternatively, if we assigned more states than were + // actually available, then either remove states from the most frequent symbol + // (for minor overruns) or use the slower adjustment algorithm (for major + // overruns). + if (-remaining < (max_freq >> 2)) { + freq[max_freq_sym] += remaining; + } else { + fse_adjust_freqs(freq, -remaining, nsymbols); + } } diff --git a/lzfse/lzfse_fse.h b/lzfse/lzfse_fse.h index 58dd724..c6a5e43 100644 --- a/lzfse/lzfse_fse.h +++ b/lzfse/lzfse_fse.h @@ -1,51 +1,30 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -// Finite state entropy coding (FSE) -// This is an implementation of the tANS algorithm described by Jarek Duda, -// we use the more descriptive name "Finite State Entropy". +/* Finite state entropy coding (FSE) + * This is an implementation of the tANS algorithm described by Jarek Duda, + * we use the more descriptive name "Finite State Entropy". + */ -#pragma once +#ifndef LZFSE_FSE_H +#define LZFSE_FSE_H #include #include -// Select between 32/64-bit I/O streams for FSE. Note that the FSE stream -// size need not match the word size of the machine, but in practice you -// want to use 64b streams on 64b systems for better performance. -#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) -#define FSE_IOSTREAM_64 1 -#else +/* Select between 32/64-bit I/O streams for FSE. Note that the FSE stream + * size need not match the word size of the machine, but in practice you + * want to use 64b streams on 64b systems for better performance. + */ #define FSE_IOSTREAM_64 0 -#endif -#if defined(_MSC_VER) && !defined(__clang__) -# define FSE_INLINE __forceinline -# define inline __inline -# pragma warning(disable : 4068) // warning C4068: unknown pragma -#else -# define FSE_INLINE static inline __attribute__((__always_inline__)) -#endif +#define FSE_INLINE static inline __attribute__((__always_inline__)) -// MARK: - Bit utils +/* MARK: - Bit utils + */ /*! @abstract Signed type used to represent bit count. */ typedef int32_t fse_bit_count; @@ -53,121 +32,146 @@ typedef int32_t fse_bit_count; /*! @abstract Unsigned type used to represent FSE state. */ typedef uint16_t fse_state; -// Mask the NBITS lsb of X. 0 <= NBITS < 64 -static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) { - static const uint64_t mtable[65] = { - 0x0000000000000000LLU, 0x0000000000000001LLU, 0x0000000000000003LLU, - 0x0000000000000007LLU, 0x000000000000000fLLU, 0x000000000000001fLLU, - 0x000000000000003fLLU, 0x000000000000007fLLU, 0x00000000000000ffLLU, - 0x00000000000001ffLLU, 0x00000000000003ffLLU, 0x00000000000007ffLLU, - 0x0000000000000fffLLU, 0x0000000000001fffLLU, 0x0000000000003fffLLU, - 0x0000000000007fffLLU, 0x000000000000ffffLLU, 0x000000000001ffffLLU, - 0x000000000003ffffLLU, 0x000000000007ffffLLU, 0x00000000000fffffLLU, - 0x00000000001fffffLLU, 0x00000000003fffffLLU, 0x00000000007fffffLLU, - 0x0000000000ffffffLLU, 0x0000000001ffffffLLU, 0x0000000003ffffffLLU, - 0x0000000007ffffffLLU, 0x000000000fffffffLLU, 0x000000001fffffffLLU, - 0x000000003fffffffLLU, 0x000000007fffffffLLU, 0x00000000ffffffffLLU, - 0x00000001ffffffffLLU, 0x00000003ffffffffLLU, 0x00000007ffffffffLLU, - 0x0000000fffffffffLLU, 0x0000001fffffffffLLU, 0x0000003fffffffffLLU, - 0x0000007fffffffffLLU, 0x000000ffffffffffLLU, 0x000001ffffffffffLLU, - 0x000003ffffffffffLLU, 0x000007ffffffffffLLU, 0x00000fffffffffffLLU, - 0x00001fffffffffffLLU, 0x00003fffffffffffLLU, 0x00007fffffffffffLLU, - 0x0000ffffffffffffLLU, 0x0001ffffffffffffLLU, 0x0003ffffffffffffLLU, - 0x0007ffffffffffffLLU, 0x000fffffffffffffLLU, 0x001fffffffffffffLLU, - 0x003fffffffffffffLLU, 0x007fffffffffffffLLU, 0x00ffffffffffffffLLU, - 0x01ffffffffffffffLLU, 0x03ffffffffffffffLLU, 0x07ffffffffffffffLLU, - 0x0fffffffffffffffLLU, 0x1fffffffffffffffLLU, 0x3fffffffffffffffLLU, - 0x7fffffffffffffffLLU, 0xffffffffffffffffLLU, - }; - return x & mtable[nbits]; +/* Mask the NBITS lsb of X. 0 <= NBITS < 64 + */ +static inline uint64_t fse_mask_lsb64(uint64_t x, fse_bit_count nbits) +{ + static const uint64_t mtable[65] = { + 0x0000000000000000LLU, 0x0000000000000001LLU, + 0x0000000000000003LLU, 0x0000000000000007LLU, + 0x000000000000000fLLU, 0x000000000000001fLLU, + 0x000000000000003fLLU, 0x000000000000007fLLU, + 0x00000000000000ffLLU, 0x00000000000001ffLLU, + 0x00000000000003ffLLU, 0x00000000000007ffLLU, + 0x0000000000000fffLLU, 0x0000000000001fffLLU, + 0x0000000000003fffLLU, 0x0000000000007fffLLU, + 0x000000000000ffffLLU, 0x000000000001ffffLLU, + 0x000000000003ffffLLU, 0x000000000007ffffLLU, + 0x00000000000fffffLLU, 0x00000000001fffffLLU, + 0x00000000003fffffLLU, 0x00000000007fffffLLU, + 0x0000000000ffffffLLU, 0x0000000001ffffffLLU, + 0x0000000003ffffffLLU, 0x0000000007ffffffLLU, + 0x000000000fffffffLLU, 0x000000001fffffffLLU, + 0x000000003fffffffLLU, 0x000000007fffffffLLU, + 0x00000000ffffffffLLU, 0x00000001ffffffffLLU, + 0x00000003ffffffffLLU, 0x00000007ffffffffLLU, + 0x0000000fffffffffLLU, 0x0000001fffffffffLLU, + 0x0000003fffffffffLLU, 0x0000007fffffffffLLU, + 0x000000ffffffffffLLU, 0x000001ffffffffffLLU, + 0x000003ffffffffffLLU, 0x000007ffffffffffLLU, + 0x00000fffffffffffLLU, 0x00001fffffffffffLLU, + 0x00003fffffffffffLLU, 0x00007fffffffffffLLU, + 0x0000ffffffffffffLLU, 0x0001ffffffffffffLLU, + 0x0003ffffffffffffLLU, 0x0007ffffffffffffLLU, + 0x000fffffffffffffLLU, 0x001fffffffffffffLLU, + 0x003fffffffffffffLLU, 0x007fffffffffffffLLU, + 0x00ffffffffffffffLLU, 0x01ffffffffffffffLLU, + 0x03ffffffffffffffLLU, 0x07ffffffffffffffLLU, + 0x0fffffffffffffffLLU, 0x1fffffffffffffffLLU, + 0x3fffffffffffffffLLU, 0x7fffffffffffffffLLU, + 0xffffffffffffffffLLU, + }; + return x & mtable[nbits]; } -// Mask the NBITS lsb of X. 0 <= NBITS < 32 -static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) { - static const uint32_t mtable[33] = { - 0x0000000000000000U, 0x0000000000000001U, 0x0000000000000003U, - 0x0000000000000007U, 0x000000000000000fU, 0x000000000000001fU, - 0x000000000000003fU, 0x000000000000007fU, 0x00000000000000ffU, - 0x00000000000001ffU, 0x00000000000003ffU, 0x00000000000007ffU, - 0x0000000000000fffU, 0x0000000000001fffU, 0x0000000000003fffU, - 0x0000000000007fffU, 0x000000000000ffffU, 0x000000000001ffffU, - 0x000000000003ffffU, 0x000000000007ffffU, 0x00000000000fffffU, - 0x00000000001fffffU, 0x00000000003fffffU, 0x00000000007fffffU, - 0x0000000000ffffffU, 0x0000000001ffffffU, 0x0000000003ffffffU, - 0x0000000007ffffffU, 0x000000000fffffffU, 0x000000001fffffffU, - 0x000000003fffffffU, 0x000000007fffffffU, 0x00000000ffffffffU, - }; - return x & mtable[nbits]; +/* Mask the NBITS lsb of X. 0 <= NBITS < 32 + */ +static inline uint32_t fse_mask_lsb32(uint32_t x, fse_bit_count nbits) +{ + static const uint32_t mtable[33] = { + 0x0000000000000000U, 0x0000000000000001U, 0x0000000000000003U, + 0x0000000000000007U, 0x000000000000000fU, 0x000000000000001fU, + 0x000000000000003fU, 0x000000000000007fU, 0x00000000000000ffU, + 0x00000000000001ffU, 0x00000000000003ffU, 0x00000000000007ffU, + 0x0000000000000fffU, 0x0000000000001fffU, 0x0000000000003fffU, + 0x0000000000007fffU, 0x000000000000ffffU, 0x000000000001ffffU, + 0x000000000003ffffU, 0x000000000007ffffU, 0x00000000000fffffU, + 0x00000000001fffffU, 0x00000000003fffffU, 0x00000000007fffffU, + 0x0000000000ffffffU, 0x0000000001ffffffU, 0x0000000003ffffffU, + 0x0000000007ffffffU, 0x000000000fffffffU, 0x000000001fffffffU, + 0x000000003fffffffU, 0x000000007fffffffU, 0x00000000ffffffffU, + }; + return x & mtable[nbits]; } /*! @abstract Select \c nbits at index \c start from \c x. * 0 <= start <= start+nbits <= 64 */ FSE_INLINE uint64_t fse_extract_bits64(uint64_t x, fse_bit_count start, - fse_bit_count nbits) { + fse_bit_count nbits) +{ #if defined(__GNUC__) - // If START and NBITS are constants, map to bit-field extraction instructions - if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) - return (x >> start) & ((1LLU << nbits) - 1LLU); + // If START and NBITS are constants, map to bit-field extraction instructions + if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) + return (x >> start) & ((1LLU << nbits) - 1LLU); #endif - // Otherwise, shift and mask - return fse_mask_lsb64(x >> start, nbits); + // Otherwise, shift and mask + return fse_mask_lsb64(x >> start, nbits); } /*! @abstract Select \c nbits at index \c start from \c x. * 0 <= start <= start+nbits <= 32 */ FSE_INLINE uint32_t fse_extract_bits32(uint32_t x, fse_bit_count start, - fse_bit_count nbits) { + fse_bit_count nbits) +{ #if defined(__GNUC__) - // If START and NBITS are constants, map to bit-field extraction instructions - if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) - return (x >> start) & ((1U << nbits) - 1U); + // If START and NBITS are constants, map to bit-field extraction instructions + if (__builtin_constant_p(start) && __builtin_constant_p(nbits)) + return (x >> start) & ((1U << nbits) - 1U); #endif - // Otherwise, shift and mask - return fse_mask_lsb32(x >> start, nbits); + // Otherwise, shift and mask + return fse_mask_lsb32(x >> start, nbits); } -// MARK: - Bit stream +/* MARK: - Bit stream + */ -// I/O streams -// The streams can be shared between several FSE encoders/decoders, which is why -// they are not in the state struct +/* I/O streams + * The streams can be shared between several FSE encoders/decoders, which is why + * they are not in the state struct + */ /*! @abstract Output stream, 64-bit accum. */ typedef struct { - uint64_t accum; // Output bits - fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 + uint64_t accum; // Output bits + fse_bit_count + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 } fse_out_stream64; /*! @abstract Output stream, 32-bit accum. */ typedef struct { - uint32_t accum; // Output bits - fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 + uint32_t accum; // Output bits + fse_bit_count + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 } fse_out_stream32; /*! @abstract Object representing an input stream. */ typedef struct { - uint64_t accum; // Input bits - fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 + uint64_t accum; // Input bits + fse_bit_count + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 } fse_in_stream64; /*! @abstract Object representing an input stream. */ typedef struct { - uint32_t accum; // Input bits - fse_bit_count accum_nbits; // Number of valid bits in ACCUM, other bits are 0 + uint32_t accum; // Input bits + fse_bit_count + accum_nbits; // Number of valid bits in ACCUM, other bits are 0 } fse_in_stream32; /*! @abstract Initialize an output stream object. */ -FSE_INLINE void fse_out_init64(fse_out_stream64 *s) { - s->accum = 0; - s->accum_nbits = 0; +FSE_INLINE void fse_out_init64(fse_out_stream64 *s) +{ + s->accum = 0; + s->accum_nbits = 0; } /*! @abstract Initialize an output stream object. */ -FSE_INLINE void fse_out_init32(fse_out_stream32 *s) { - s->accum = 0; - s->accum_nbits = 0; +FSE_INLINE void fse_out_init32(fse_out_stream32 *s) +{ + s->accum = 0; + s->accum_nbits = 0; } /*! @abstract Write full bytes from the accumulator to output buffer, ensuring @@ -175,17 +179,18 @@ FSE_INLINE void fse_out_init32(fse_out_stream32 *s) { * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all * cases. * @note *pbuf is incremented by the number of written bytes. */ -FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) { - fse_bit_count nbits = - s->accum_nbits & -8; // number of bits written, multiple of 8 - - // Write 8 bytes of current accumulator - memcpy(*pbuf, &(s->accum), 8); - *pbuf += (nbits >> 3); // bytes - - // Update state - s->accum >>= nbits; // remove nbits - s->accum_nbits -= nbits; +FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) +{ + fse_bit_count nbits = s->accum_nbits & + -8; // number of bits written, multiple of 8 + + // Write 8 bytes of current accumulator + memcpy(*pbuf, &(s->accum), 8); + *pbuf += (nbits >> 3); // bytes + + // Update state + s->accum >>= nbits; // remove nbits + s->accum_nbits -= nbits; } /*! @abstract Write full bytes from the accumulator to output buffer, ensuring @@ -193,17 +198,18 @@ FSE_INLINE void fse_out_flush64(fse_out_stream64 *s, uint8_t **pbuf) { * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all * cases. * @note *pbuf is incremented by the number of written bytes. */ -FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) { - fse_bit_count nbits = - s->accum_nbits & -8; // number of bits written, multiple of 8 - - // Write 4 bytes of current accumulator - memcpy(*pbuf, &(s->accum), 4); - *pbuf += (nbits >> 3); // bytes - - // Update state - s->accum >>= nbits; // remove nbits - s->accum_nbits -= nbits; +FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) +{ + fse_bit_count nbits = s->accum_nbits & + -8; // number of bits written, multiple of 8 + + // Write 4 bytes of current accumulator + memcpy(*pbuf, &(s->accum), 4); + *pbuf += (nbits >> 3); // bytes + + // Update state + s->accum >>= nbits; // remove nbits + s->accum_nbits -= nbits; } /*! @abstract Write the last bytes from the accumulator to output buffer, @@ -211,17 +217,18 @@ FSE_INLINE void fse_out_flush32(fse_out_stream32 *s, uint8_t **pbuf) { * We assume we can write 8 bytes to the output buffer \c (*pbuf[0..7]) in all * cases. * @note *pbuf is incremented by the number of written bytes. */ -FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) { - fse_bit_count nbits = - (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8 - - // Write 8 bytes of current accumulator - memcpy(*pbuf, &(s->accum), 8); - *pbuf += (nbits >> 3); // bytes - - // Update state - s->accum = 0; // remove nbits - s->accum_nbits -= nbits; +FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) +{ + fse_bit_count nbits = (s->accum_nbits + 7) & + -8; // number of bits written, multiple of 8 + + // Write 8 bytes of current accumulator + memcpy(*pbuf, &(s->accum), 8); + *pbuf += (nbits >> 3); // bytes + + // Update state + s->accum = 0; // remove nbits + s->accum_nbits -= nbits; } /*! @abstract Write the last bytes from the accumulator to output buffer, @@ -229,37 +236,38 @@ FSE_INLINE void fse_out_finish64(fse_out_stream64 *s, uint8_t **pbuf) { * We assume we can write 4 bytes to the output buffer \c (*pbuf[0..3]) in all * cases. * @note *pbuf is incremented by the number of written bytes. */ -FSE_INLINE void fse_out_finish32(fse_out_stream32 *s, uint8_t **pbuf) { - fse_bit_count nbits = - (s->accum_nbits + 7) & -8; // number of bits written, multiple of 8 - - // Write 8 bytes of current accumulator - memcpy(*pbuf, &(s->accum), 4); - *pbuf += (nbits >> 3); // bytes - - // Update state - s->accum = 0; // remove nbits - s->accum_nbits -= nbits; +FSE_INLINE void fse_out_finish32(fse_out_stream32 *s, uint8_t **pbuf) +{ + fse_bit_count nbits = (s->accum_nbits + 7) & + -8; // number of bits written, multiple of 8 + + // Write 8 bytes of current accumulator + memcpy(*pbuf, &(s->accum), 4); + *pbuf += (nbits >> 3); // bytes + + // Update state + s->accum = 0; // remove nbits + s->accum_nbits -= nbits; } /*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: * 0 <= b < 2^n, and N + s->accum_nbits <= 64. * @note The caller must ensure out_flush is called \b before the accumulator * overflows to more than 64 bits. */ -FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n, - uint64_t b) { - s->accum |= b << s->accum_nbits; - s->accum_nbits += n; +FSE_INLINE void fse_out_push64(fse_out_stream64 *s, fse_bit_count n, uint64_t b) +{ + s->accum |= b << s->accum_nbits; + s->accum_nbits += n; } /*! @abstract Accumulate \c n bits \c b to output stream \c s. We \b must have: * 0 <= n < 2^n, and n + s->accum_nbits <= 32. * @note The caller must ensure out_flush is called \b before the accumulator * overflows to more than 32 bits. */ -FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n, - uint32_t b) { - s->accum |= b << s->accum_nbits; - s->accum_nbits += n; +FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n, uint32_t b) +{ + s->accum |= b << s->accum_nbits; + s->accum_nbits += n; } #define DEBUG_CHECK_INPUT_STREAM_PARAMETERS @@ -271,59 +279,61 @@ FSE_INLINE void fse_out_push32(fse_out_stream32 *s, fse_bit_count n, * operations. This is why we have the special case for n == 0 (in which case * we want to load only 7 bytes instead of 8). */ FSE_INLINE int fse_in_checked_init64(fse_in_stream64 *s, fse_bit_count n, - const uint8_t **pbuf, - const uint8_t *buf_start) { - if (n) { - if (*pbuf < buf_start + 8) - return -1; // out of range - *pbuf -= 8; - memcpy(&(s->accum), *pbuf, 8); - s->accum_nbits = n + 64; - } else { - if (*pbuf < buf_start + 7) - return -1; // out of range - *pbuf -= 7; - memcpy(&(s->accum), *pbuf, 7); - s->accum &= 0xffffffffffffff; - s->accum_nbits = n + 56; - } - - if ((s->accum_nbits < 56 || s->accum_nbits >= 64) || - ((s->accum >> s->accum_nbits) != 0)) { - return -1; // the incoming input is wrong (encoder should have zeroed the - // upper bits) - } - - return 0; // OK + const uint8_t **pbuf, + const uint8_t *buf_start) +{ + if (n) { + if (*pbuf < buf_start + 8) + return -1; // out of range + *pbuf -= 8; + memcpy(&(s->accum), *pbuf, 8); + s->accum_nbits = n + 64; + } else { + if (*pbuf < buf_start + 7) + return -1; // out of range + *pbuf -= 7; + memcpy(&(s->accum), *pbuf, 7); + s->accum &= 0xffffffffffffff; + s->accum_nbits = n + 56; + } + + if ((s->accum_nbits < 56 || s->accum_nbits >= 64) || + ((s->accum >> s->accum_nbits) != 0)) { + return -1; // the incoming input is wrong (encoder should have zeroed the + // upper bits) + } + + return 0; // OK } /*! @abstract Identical to previous function, but for 32-bit operation * (resulting bit count is between 24 and 31 bits). */ FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n, - const uint8_t **pbuf, - const uint8_t *buf_start) { - if (n) { - if (*pbuf < buf_start + 4) - return -1; // out of range - *pbuf -= 4; - memcpy(&(s->accum), *pbuf, 4); - s->accum_nbits = n + 32; - } else { - if (*pbuf < buf_start + 3) - return -1; // out of range - *pbuf -= 3; - memcpy(&(s->accum), *pbuf, 3); - s->accum &= 0xffffff; - s->accum_nbits = n + 24; - } - - if ((s->accum_nbits < 24 || s->accum_nbits >= 32) || - ((s->accum >> s->accum_nbits) != 0)) { - return -1; // the incoming input is wrong (encoder should have zeroed the - // upper bits) - } - - return 0; // OK + const uint8_t **pbuf, + const uint8_t *buf_start) +{ + if (n) { + if (*pbuf < buf_start + 4) + return -1; // out of range + *pbuf -= 4; + memcpy(&(s->accum), *pbuf, 4); + s->accum_nbits = n + 32; + } else { + if (*pbuf < buf_start + 3) + return -1; // out of range + *pbuf -= 3; + memcpy(&(s->accum), *pbuf, 3); + s->accum &= 0xffffff; + s->accum_nbits = n + 24; + } + + if ((s->accum_nbits < 24 || s->accum_nbits >= 32) || + ((s->accum >> s->accum_nbits) != 0)) { + return -1; // the incoming input is wrong (encoder should have zeroed the + // upper bits) + } + + return 0; // OK } /*! @abstract Read in new bytes from buffer to ensure that we have a full @@ -332,72 +342,79 @@ FSE_INLINE int fse_in_checked_init32(fse_in_stream32 *s, fse_bit_count n, * @return 0 if OK. * @return -1 on failure. */ FSE_INLINE int fse_in_checked_flush64(fse_in_stream64 *s, const uint8_t **pbuf, - const uint8_t *buf_start) { - // Get number of bits to add to bring us into the desired range. - fse_bit_count nbits = (63 - s->accum_nbits) & -8; - // Convert bits to bytes and decrement buffer address, then load new data. - const uint8_t *buf = (*pbuf) - (nbits >> 3); - uint64_t incoming; - if (buf < buf_start) { - return -1; // out of range - } - *pbuf = buf; - memcpy(&incoming, buf, 8); - // Update the state object and verify its validity (in DEBUG). - s->accum = (s->accum << nbits) | fse_mask_lsb64(incoming, nbits); - s->accum_nbits += nbits; - DEBUG_CHECK_INPUT_STREAM_PARAMETERS - return 0; // OK + const uint8_t *buf_start) +{ + // Get number of bits to add to bring us into the desired range. + fse_bit_count nbits = (63 - s->accum_nbits) & -8; + // Convert bits to bytes and decrement buffer address, then load new data. + const uint8_t *buf = (*pbuf) - (nbits >> 3); + uint64_t incoming; + if (buf < buf_start) { + return -1; // out of range + } + *pbuf = buf; + memcpy(&incoming, buf, 8); + // Update the state object and verify its validity (in DEBUG). + s->accum = (s->accum << nbits) | fse_mask_lsb64(incoming, nbits); + s->accum_nbits += nbits; + DEBUG_CHECK_INPUT_STREAM_PARAMETERS + return 0; // OK } /*! @abstract Identical to previous function (but again, we're only filling * a 32-bit field with between 24 and 31 bits). */ FSE_INLINE int fse_in_checked_flush32(fse_in_stream32 *s, const uint8_t **pbuf, - const uint8_t *buf_start) { - // Get number of bits to add to bring us into the desired range. - fse_bit_count nbits = (31 - s->accum_nbits) & -8; - - if (nbits > 0) { - // Convert bits to bytes and decrement buffer address, then load new data. - const uint8_t *buf = (*pbuf) - (nbits >> 3); - uint32_t incoming; - if (buf < buf_start) { - return -1; // out of range - } - - *pbuf = buf; - - incoming = *((uint32_t *)buf); - - // Update the state object and verify its validity (in DEBUG). - s->accum = (s->accum << nbits) | fse_mask_lsb32(incoming, nbits); - s->accum_nbits += nbits; - } - DEBUG_CHECK_INPUT_STREAM_PARAMETERS - return 0; // OK + const uint8_t *buf_start) +{ + // Get number of bits to add to bring us into the desired range. + fse_bit_count nbits = (31 - s->accum_nbits) & -8; + + if (nbits > 0) { + // Convert bits to bytes and decrement buffer address, then load new data. + const uint8_t *buf = (*pbuf) - (nbits >> 3); + uint32_t incoming; + if (buf < buf_start) { + return -1; // out of range + } + + *pbuf = buf; + + incoming = *((uint32_t *)buf); + + // Update the state object and verify its validity (in DEBUG). + s->accum = (s->accum << nbits) | + fse_mask_lsb32(incoming, nbits); + s->accum_nbits += nbits; + } + DEBUG_CHECK_INPUT_STREAM_PARAMETERS + return 0; // OK } /*! @abstract Pull n bits out of the fse stream object. */ -FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) { - uint64_t result; - s->accum_nbits -= n; - result = s->accum >> s->accum_nbits; - s->accum = fse_mask_lsb64(s->accum, s->accum_nbits); - return result; +FSE_INLINE uint64_t fse_in_pull64(fse_in_stream64 *s, fse_bit_count n) +{ + uint64_t result; + s->accum_nbits -= n; + result = s->accum >> s->accum_nbits; + s->accum = fse_mask_lsb64(s->accum, s->accum_nbits); + return result; } /*! @abstract Pull n bits out of the fse stream object. */ -FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) { - uint32_t result; - s->accum_nbits -= n; - result = s->accum >> s->accum_nbits; - s->accum = fse_mask_lsb32(s->accum, s->accum_nbits); - return result; +FSE_INLINE uint32_t fse_in_pull32(fse_in_stream32 *s, fse_bit_count n) +{ + uint32_t result; + s->accum_nbits -= n; + result = s->accum >> s->accum_nbits; + s->accum = fse_mask_lsb32(s->accum, s->accum_nbits); + return result; } -// MARK: - Encode/Decode +/* MARK: - Encode/Decode + */ -// Map to 32/64-bit implementations and types for I/O +/* Map to 32/64-bit implementations and types for I/O + */ #if FSE_IOSTREAM_64 typedef uint64_t fse_bits; @@ -414,7 +431,7 @@ typedef fse_in_stream64 fse_in_stream; #define fse_in_flush fse_in_checked_flush64 #define fse_in_checked_flush fse_in_checked_flush64 #define fse_in_flush2(_unused, _parameters, _unused2) 0 /* nothing */ -#define fse_in_checked_flush2(_unused, _parameters) /* nothing */ +#define fse_in_checked_flush2(_unused, _parameters) /* nothing */ #define fse_in_pull fse_in_pull64 #else @@ -440,26 +457,26 @@ typedef fse_in_stream32 fse_in_stream; /*! @abstract Entry for one symbol in the encoder table (64b). */ typedef struct { - int16_t s0; // First state requiring a K-bit shift - int16_t k; // States S >= S0 are shifted K bits. States S < S0 are - // shifted K-1 bits - int16_t delta0; // Relative increment used to compute next state if S >= S0 - int16_t delta1; // Relative increment used to compute next state if S < S0 + int16_t s0; // First state requiring a K-bit shift + int16_t k; // States S >= S0 are shifted K bits. States S < S0 are + // shifted K-1 bits + int16_t delta0; // Relative increment used to compute next state if S >= S0 + int16_t delta1; // Relative increment used to compute next state if S < S0 } fse_encoder_entry; /*! @abstract Entry for one state in the decoder table (32b). */ -typedef struct { // DO NOT REORDER THE FIELDS - int8_t k; // Number of bits to read - uint8_t symbol; // Emitted symbol - int16_t delta; // Signed increment used to compute next state (+bias) +typedef struct { // DO NOT REORDER THE FIELDS + int8_t k; // Number of bits to read + uint8_t symbol; // Emitted symbol + int16_t delta; // Signed increment used to compute next state (+bias) } fse_decoder_entry; /*! @abstract Entry for one state in the value decoder table (64b). */ -typedef struct { // DO NOT REORDER THE FIELDS - uint8_t total_bits; // state bits + extra value bits = shift for next decode - uint8_t value_bits; // extra value bits - int16_t delta; // state base (delta) - int32_t vbase; // value base +typedef struct { // DO NOT REORDER THE FIELDS + uint8_t total_bits; // state bits + extra value bits = shift for next decode + uint8_t value_bits; // extra value bits + int16_t delta; // state base (delta) + int32_t vbase; // value base } fse_value_decoder_entry; /*! @abstract Encode SYMBOL using the encoder table, and update \c *pstate, @@ -467,26 +484,27 @@ typedef struct { // DO NOT REORDER THE FIELDS * @note The caller must ensure we have enough bits available in the output * stream accumulator. */ FSE_INLINE void fse_encode(fse_state *__restrict pstate, - const fse_encoder_entry *__restrict encoder_table, - fse_out_stream *__restrict out, uint8_t symbol) { - int s = *pstate; - fse_encoder_entry e = encoder_table[symbol]; - int s0 = e.s0; - int k = e.k; - int delta0 = e.delta0; - int delta1 = e.delta1; - - // Number of bits to write - int hi = s >= s0; - fse_bit_count nbits = hi ? k : (k - 1); - fse_state delta = hi ? delta0 : delta1; - - // Write lower NBITS of state - fse_bits b = fse_mask_lsb(s, nbits); - fse_out_push(out, nbits, b); - - // Update state with remaining bits and delta - *pstate = delta + (s >> nbits); + const fse_encoder_entry *__restrict encoder_table, + fse_out_stream *__restrict out, uint8_t symbol) +{ + int s = *pstate; + fse_encoder_entry e = encoder_table[symbol]; + int s0 = e.s0; + int k = e.k; + int delta0 = e.delta0; + int delta1 = e.delta1; + + // Number of bits to write + int hi = s >= s0; + fse_bit_count nbits = hi ? k : (k - 1); + fse_state delta = hi ? delta0 : delta1; + + // Write lower NBITS of state + fse_bits b = fse_mask_lsb(s, nbits); + fse_out_push(out, nbits, b); + + // Update state with remaining bits and delta + *pstate = delta + (s >> nbits); } /*! @abstract Decode and return symbol using the decoder table, and update @@ -494,15 +512,16 @@ FSE_INLINE void fse_encode(fse_state *__restrict pstate, * @note The caller must ensure we have enough bits available in the input * stream accumulator. */ FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate, - const int32_t *__restrict decoder_table, - fse_in_stream *__restrict in) { - int32_t e = decoder_table[*pstate]; + const int32_t *__restrict decoder_table, + fse_in_stream *__restrict in) +{ + int32_t e = decoder_table[*pstate]; - // Update state from K bits of input + DELTA - *pstate = (fse_state)(e >> 16) + (fse_state)fse_in_pull(in, e & 0xff); + // Update state from K bits of input + DELTA + *pstate = (fse_state)(e >> 16) + (fse_state)fse_in_pull(in, e & 0xff); - // Return the symbol for this state - return fse_extract_bits(e, 8, 8); // symbol + // Return the symbol for this state + return fse_extract_bits(e, 8, 8); // symbol } /*! @abstract Decode and return value using the decoder table, and update \c @@ -512,34 +531,39 @@ FSE_INLINE uint8_t fse_decode(fse_state *__restrict pstate, * stream accumulator. */ FSE_INLINE int32_t fse_value_decode(fse_state *__restrict pstate, - const fse_value_decoder_entry *value_decoder_table, - fse_in_stream *__restrict in) { - fse_value_decoder_entry entry = value_decoder_table[*pstate]; - uint32_t state_and_value_bits = (uint32_t)fse_in_pull(in, entry.total_bits); - *pstate = - (fse_state)(entry.delta + (state_and_value_bits >> entry.value_bits)); - return (int32_t)(entry.vbase + - fse_mask_lsb(state_and_value_bits, entry.value_bits)); + const fse_value_decoder_entry *value_decoder_table, + fse_in_stream *__restrict in) +{ + fse_value_decoder_entry entry = value_decoder_table[*pstate]; + uint32_t state_and_value_bits = + (uint32_t)fse_in_pull(in, entry.total_bits); + *pstate = (fse_state)(entry.delta + + (state_and_value_bits >> entry.value_bits)); + return (int32_t)(entry.vbase + + fse_mask_lsb(state_and_value_bits, entry.value_bits)); } -// MARK: - Tables +/* MARK: - Tables + */ -// IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder -// tables shall be initialized with the same parameters, including the -// FREQ[NSYMBOL] array. -// +/* IMPORTANT: To properly decode an FSE encoded stream, both encoder/decoder + * tables shall be initialized with the same parameters, including the + * FREQ[NSYMBOL] array. + * + */ /*! @abstract Sanity check on frequency table, verify sum of \c freq * is <= \c number_of_states. */ FSE_INLINE int fse_check_freq(const uint16_t *freq_table, - const size_t table_size, - const size_t number_of_states) { - size_t sum_of_freq = 0; - int i; - for (i = 0; i < table_size; i++) { - sum_of_freq += freq_table[i]; - } - return (sum_of_freq > number_of_states) ? -1 : 0; + const size_t table_size, + const size_t number_of_states) +{ + size_t sum_of_freq = 0; + int i; + for (i = 0; i < table_size; i++) { + sum_of_freq += freq_table[i]; + } + return (sum_of_freq > number_of_states) ? -1 : 0; } /*! @abstract Initialize encoder table \c t[nsymbols]. @@ -556,8 +580,8 @@ FSE_INLINE int fse_check_freq(const uint16_t *freq_table, * present in the data. */ void fse_init_encoder_table(int nstates, int nsymbols, - const uint16_t *__restrict freq, - fse_encoder_entry *__restrict t); + const uint16_t *__restrict freq, + fse_encoder_entry *__restrict t); /*! @abstract Initialize decoder table \c t[nstates]. * @@ -576,8 +600,8 @@ void fse_init_encoder_table(int nstates, int nsymbols, * @return -1 on failure. */ int fse_init_decoder_table(int nstates, int nsymbols, - const uint16_t *__restrict freq, - int32_t *__restrict t); + const uint16_t *__restrict freq, + int32_t *__restrict t); /*! @abstract Initialize value decoder table \c t[nstates]. * @@ -595,12 +619,14 @@ int fse_init_decoder_table(int nstates, int nsymbols, * present in the data. */ void fse_init_value_decoder_table(int nstates, int nsymbols, - const uint16_t *__restrict freq, - const uint8_t *__restrict symbol_vbits, - const int32_t *__restrict symbol_vbase, - fse_value_decoder_entry *__restrict t); + const uint16_t *__restrict freq, + const uint8_t *__restrict symbol_vbits, + const int32_t *__restrict symbol_vbase, + fse_value_decoder_entry *__restrict t); /*! @abstract Normalize a table \c t[nsymbols] of occurrences to * \c freq[nsymbols]. */ void fse_normalize_freq(int nstates, int nsymbols, const uint32_t *__restrict t, - uint16_t *__restrict freq); + uint16_t *__restrict freq); + +#endif /* LZFSE_FSE_H */ diff --git a/lzfse/lzfse_internal.h b/lzfse/lzfse_internal.h index 24d4d93..afca0b3 100644 --- a/lzfse/lzfse_internal.h +++ b/lzfse/lzfse_internal.h @@ -1,89 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ #ifndef LZFSE_INTERNAL_H #define LZFSE_INTERNAL_H -// Unlike the tunable parameters defined in lzfse_tunables.h, you probably -// should not modify the values defined in this header. Doing so will either -// break the compressor, or result in a compressed data format that is -// incompatible. +/* Unlike the tunable parameters defined in lzfse_tunables.h, you probably + * should not modify the values defined in this header. Doing so will either + * break the compressor, or result in a compressed data format that is + * incompatible. + */ + +#define LZFSE_INLINE static inline __attribute__((__always_inline__)) #include "lzfse_fse.h" #include "lzfse_tunables.h" #include #include -#if defined(_MSC_VER) && !defined(__clang__) -# define LZFSE_INLINE __forceinline -# define __builtin_expect(X, Y) (X) -# define __attribute__(X) -# pragma warning(disable : 4068) // warning C4068: unknown pragma -#else -# define LZFSE_INLINE static inline __attribute__((__always_inline__)) -#endif - -// Implement GCC bit scan builtins for MSVC -#if defined(_MSC_VER) && !defined(__clang__) -#include - -LZFSE_INLINE int __builtin_clz(unsigned int val) { - unsigned long r = 0; - if (_BitScanReverse(&r, val)) { - return 31 - r; - } - return 32; -} - -LZFSE_INLINE int __builtin_ctzl(unsigned long val) { - unsigned long r = 0; - if (_BitScanForward(&r, val)) { - return r; - } - return 32; -} - -LZFSE_INLINE int __builtin_ctzll(uint64_t val) { - unsigned long r = 0; -#if defined(_M_AMD64) || defined(_M_ARM) - if (_BitScanForward64(&r, val)) { - return r; - } -#else - if (_BitScanForward(&r, (uint32_t)val)) { - return r; - } - if (_BitScanForward(&r, (uint32_t)(val >> 32))) { - return 32 + r; - } -#endif - return 64; -} -#endif - -// Throughout LZFSE we refer to "L", "M" and "D"; these will always appear as -// a triplet, and represent a "usual" LZ-style literal and match pair. "L" -// is the number of literal bytes, "M" is the number of match bytes, and "D" -// is the match "distance"; the distance in bytes between the current pointer -// and the start of the match. +/* Throughout LZFSE we refer to "L", "M" and "D"; these will always appear as + * a triplet, and represent a "usual" LZ-style literal and match pair. "L" + * is the number of literal bytes, "M" is the number of match bytes, and "D" + * is the match "distance"; the distance in bytes between the current pointer + * and the start of the match. + */ #define LZFSE_ENCODE_HASH_VALUES (1 << LZFSE_ENCODE_HASH_BITS) #define LZFSE_ENCODE_L_SYMBOLS 20 #define LZFSE_ENCODE_M_SYMBOLS 20 @@ -97,19 +39,21 @@ LZFSE_INLINE int __builtin_ctzll(uint64_t val) { #define LZFSE_LITERALS_PER_BLOCK (4 * LZFSE_MATCHES_PER_BLOCK) #define LZFSE_DECODE_LITERALS_PER_BLOCK (4 * LZFSE_DECODE_MATCHES_PER_BLOCK) -// LZFSE internal status. These values are used by internal LZFSE routines -// as return codes. There should not be any good reason to change their -// values; it is plausible that additional codes might be added in the -// future. +/* LZFSE internal status. These values are used by internal LZFSE routines + * as return codes. There should not be any good reason to change their + * values; it is plausible that additional codes might be added in the + * future. + */ #define LZFSE_STATUS_OK 0 #define LZFSE_STATUS_SRC_EMPTY -1 #define LZFSE_STATUS_DST_FULL -2 #define LZFSE_STATUS_ERROR -3 -// Type representing an offset between elements in a buffer. On 64-bit -// systems, this is stored in a 64-bit container to avoid extra sign- -// extension operations in addressing arithmetic, but the value is always -// representable as a 32-bit signed value in LZFSE's usage. +/* Type representing an offset between elements in a buffer. On 64-bit + * systems, this is stored in a 64-bit container to avoid extra sign- + * extension operations in addressing arithmetic, but the value is always + * representable as a 32-bit signed value in LZFSE's usage. + */ #if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) typedef int64_t lzfse_offset; #else @@ -126,8 +70,8 @@ typedef uint64_t uintmax_t; * doing any pointer chasing and without pulling in any additional cachelines. * This provides a large performance win in practice. */ typedef struct { - int32_t pos[LZFSE_ENCODE_HASH_WIDTH]; - uint32_t value[LZFSE_ENCODE_HASH_WIDTH]; + int32_t pos[LZFSE_ENCODE_HASH_WIDTH]; + uint32_t value[LZFSE_ENCODE_HASH_WIDTH]; } lzfse_history_set; /*! @abstract An lzfse match is a sequence of bytes in the source buffer that @@ -144,190 +88,200 @@ typedef struct { * @endcode */ typedef struct { - // Offset of the first byte in the match. - lzfse_offset pos; - // First byte of the source -- the earlier location in the buffer with the - // same contents. - lzfse_offset ref; - // Length of the match. - uint32_t length; + // Offset of the first byte in the match. + lzfse_offset pos; + // First byte of the source -- the earlier location in the buffer with the + // same contents. + lzfse_offset ref; + // Length of the match. + uint32_t length; } lzfse_match; -// MARK: - Encoder and Decoder state objects +/* MARK: - Encoder and Decoder state objects + */ /*! @abstract Encoder state object. */ typedef struct { - // Pointer to first byte of the source buffer. - const uint8_t *src; - // Length of the source buffer in bytes. Note that this is not a size_t, - // but rather lzfse_offset, which is a signed type. The largest - // representable buffer is 2GB, but arbitrarily large buffers may be - // handled by repeatedly calling the encoder function and "translating" - // the state between calls. When doing this, it is beneficial to use - // blocks smaller than 2GB in order to maintain residency in the last-level - // cache. Consult the implementation of lzfse_encode_buffer for details. - lzfse_offset src_end; - // Offset of the first byte of the next literal to encode in the source - // buffer. - lzfse_offset src_literal; - // Offset of the byte currently being checked for a match. - lzfse_offset src_encode_i; - // The last byte offset to consider for a match. In some uses it makes - // sense to use a smaller offset than src_end. - lzfse_offset src_encode_end; - // Pointer to the next byte to be written in the destination buffer. - uint8_t *dst; - // Pointer to the first byte of the destination buffer. - uint8_t *dst_begin; - // Pointer to one byte past the end of the destination buffer. - uint8_t *dst_end; - // Pending match; will be emitted unless a better match is found. - lzfse_match pending; - // The number of matches written so far. Note that there is no problem in - // using a 32-bit field for this quantity, because the state already limits - // us to at most 2GB of data; there cannot possibly be more matches than - // there are bytes in the input. - uint32_t n_matches; - // The number of literals written so far. - uint32_t n_literals; - // Lengths of found literals. - uint32_t l_values[LZFSE_MATCHES_PER_BLOCK]; - // Lengths of found matches. - uint32_t m_values[LZFSE_MATCHES_PER_BLOCK]; - // Distances of found matches. - uint32_t d_values[LZFSE_MATCHES_PER_BLOCK]; - // Concatenated literal bytes. - uint8_t literals[LZFSE_LITERALS_PER_BLOCK]; - // History table used to search for matches. Each entry of the table - // corresponds to a group of four byte sequences in the input stream - // that hash to the same value. - lzfse_history_set history_table[LZFSE_ENCODE_HASH_VALUES]; + // Pointer to first byte of the source buffer. + const uint8_t *src; + // Length of the source buffer in bytes. Note that this is not a size_t, + // but rather lzfse_offset, which is a signed type. The largest + // representable buffer is 2GB, but arbitrarily large buffers may be + // handled by repeatedly calling the encoder function and "translating" + // the state between calls. When doing this, it is beneficial to use + // blocks smaller than 2GB in order to maintain residency in the last-level + // cache. Consult the implementation of lzfse_encode_buffer for details. + lzfse_offset src_end; + // Offset of the first byte of the next literal to encode in the source + // buffer. + lzfse_offset src_literal; + // Offset of the byte currently being checked for a match. + lzfse_offset src_encode_i; + // The last byte offset to consider for a match. In some uses it makes + // sense to use a smaller offset than src_end. + lzfse_offset src_encode_end; + // Pointer to the next byte to be written in the destination buffer. + uint8_t *dst; + // Pointer to the first byte of the destination buffer. + uint8_t *dst_begin; + // Pointer to one byte past the end of the destination buffer. + uint8_t *dst_end; + // Pending match; will be emitted unless a better match is found. + lzfse_match pending; + // The number of matches written so far. Note that there is no problem in + // using a 32-bit field for this quantity, because the state already limits + // us to at most 2GB of data; there cannot possibly be more matches than + // there are bytes in the input. + uint32_t n_matches; + // The number of literals written so far. + uint32_t n_literals; + // Lengths of found literals. + uint32_t l_values[LZFSE_MATCHES_PER_BLOCK]; + // Lengths of found matches. + uint32_t m_values[LZFSE_MATCHES_PER_BLOCK]; + // Distances of found matches. + uint32_t d_values[LZFSE_MATCHES_PER_BLOCK]; + // Concatenated literal bytes. + uint8_t literals[LZFSE_LITERALS_PER_BLOCK]; + // History table used to search for matches. Each entry of the table + // corresponds to a group of four byte sequences in the input stream + // that hash to the same value. + lzfse_history_set history_table[LZFSE_ENCODE_HASH_VALUES]; } lzfse_encoder_state; /*! @abstract Decoder state object for lzfse compressed blocks. */ typedef struct { - // Number of matches remaining in the block. - uint32_t n_matches; - // Number of bytes used to encode L, M, D triplets for the block. - uint32_t n_lmd_payload_bytes; - // Pointer to the next literal to emit. - const uint8_t *current_literal; - // L, M, D triplet for the match currently being emitted. This is used only - // if we need to restart after reaching the end of the destination buffer in - // the middle of a literal or match. - int32_t l_value, m_value, d_value; - // FSE stream object. - fse_in_stream lmd_in_stream; - // Offset of L,M,D encoding in the input buffer. Because we read through an - // FSE stream *backwards* while decoding, this is decremented as we move - // through a block. - uint32_t lmd_in_buf; - // The current state of the L, M, and D FSE decoders. - uint16_t l_state, m_state, d_state; - // Internal FSE decoder tables for the current block. These have - // alignment forced to 8 bytes to guarantee that a single state's - // entry cannot span two cachelines. - fse_value_decoder_entry l_decoder[LZFSE_ENCODE_L_STATES] __attribute__((__aligned__(8))); - fse_value_decoder_entry m_decoder[LZFSE_ENCODE_M_STATES] __attribute__((__aligned__(8))); - fse_value_decoder_entry d_decoder[LZFSE_ENCODE_D_STATES] __attribute__((__aligned__(8))); - int32_t literal_decoder[LZFSE_ENCODE_LITERAL_STATES]; - // The literal stream for the block, plus padding to allow for faster copy - // operations. - uint8_t literals[LZFSE_LITERALS_PER_BLOCK + 64]; + // Number of matches remaining in the block. + uint32_t n_matches; + // Number of bytes used to encode L, M, D triplets for the block. + uint32_t n_lmd_payload_bytes; + // Pointer to the next literal to emit. + const uint8_t *current_literal; + // L, M, D triplet for the match currently being emitted. This is used only + // if we need to restart after reaching the end of the destination buffer in + // the middle of a literal or match. + int32_t l_value, m_value, d_value; + // FSE stream object. + fse_in_stream lmd_in_stream; + // Offset of L,M,D encoding in the input buffer. Because we read through an + // FSE stream *backwards* while decoding, this is decremented as we move + // through a block. + uint32_t lmd_in_buf; + // The current state of the L, M, and D FSE decoders. + uint16_t l_state, m_state, d_state; + // Internal FSE decoder tables for the current block. These have + // alignment forced to 8 bytes to guarantee that a single state's + // entry cannot span two cachelines. + fse_value_decoder_entry l_decoder[LZFSE_ENCODE_L_STATES] + __attribute__((__aligned__(8))); + fse_value_decoder_entry m_decoder[LZFSE_ENCODE_M_STATES] + __attribute__((__aligned__(8))); + fse_value_decoder_entry d_decoder[LZFSE_ENCODE_D_STATES] + __attribute__((__aligned__(8))); + int32_t literal_decoder[LZFSE_ENCODE_LITERAL_STATES]; + // The literal stream for the block, plus padding to allow for faster copy + // operations. + uint8_t literals[LZFSE_LITERALS_PER_BLOCK + 64]; } lzfse_compressed_block_decoder_state; -// Decoder state object for uncompressed blocks. -typedef struct { uint32_t n_raw_bytes; } uncompressed_block_decoder_state; +/* Decoder state object for uncompressed blocks. + */ +typedef struct { + uint32_t n_raw_bytes; +} uncompressed_block_decoder_state; /*! @abstract Decoder state object for lzvn-compressed blocks. */ typedef struct { - uint32_t n_raw_bytes; - uint32_t n_payload_bytes; - uint32_t d_prev; + uint32_t n_raw_bytes; + uint32_t n_payload_bytes; + uint32_t d_prev; } lzvn_compressed_block_decoder_state; /*! @abstract Decoder state object. */ typedef struct { - // Pointer to next byte to read from source buffer (this is advanced as we - // decode; src_begin describe the buffer and do not change). - const uint8_t *src; - // Pointer to first byte of source buffer. - const uint8_t *src_begin; - // Pointer to one byte past the end of the source buffer. - const uint8_t *src_end; - // Pointer to the next byte to write to destination buffer (this is advanced - // as we decode; dst_begin and dst_end describe the buffer and do not change). - uint8_t *dst; - // Pointer to first byte of destination buffer. - uint8_t *dst_begin; - // Pointer to one byte past the end of the destination buffer. - uint8_t *dst_end; - // 1 if we have reached the end of the stream, 0 otherwise. - int end_of_stream; - // magic number of the current block if we are within a block, - // LZFSE_NO_BLOCK_MAGIC otherwise. - uint32_t block_magic; - lzfse_compressed_block_decoder_state compressed_lzfse_block_state; - lzvn_compressed_block_decoder_state compressed_lzvn_block_state; - uncompressed_block_decoder_state uncompressed_block_state; + // Pointer to next byte to read from source buffer (this is advanced as we + // decode; src_begin describe the buffer and do not change). + const uint8_t *src; + // Pointer to first byte of source buffer. + const uint8_t *src_begin; + // Pointer to one byte past the end of the source buffer. + const uint8_t *src_end; + // Pointer to the next byte to write to destination buffer (this is advanced + // as we decode; dst_begin and dst_end describe the buffer and do not change). + uint8_t *dst; + // Pointer to first byte of destination buffer. + uint8_t *dst_begin; + // Pointer to one byte past the end of the destination buffer. + uint8_t *dst_end; + // 1 if we have reached the end of the stream, 0 otherwise. + int end_of_stream; + // magic number of the current block if we are within a block, + // LZFSE_NO_BLOCK_MAGIC otherwise. + uint32_t block_magic; + lzfse_compressed_block_decoder_state compressed_lzfse_block_state; + lzvn_compressed_block_decoder_state compressed_lzvn_block_state; + uncompressed_block_decoder_state uncompressed_block_state; } lzfse_decoder_state; -// MARK: - Block header objects +/* MARK: - Block header objects + */ -#define LZFSE_NO_BLOCK_MAGIC 0x00000000 // 0 (invalid) -#define LZFSE_ENDOFSTREAM_BLOCK_MAGIC 0x24787662 // bvx$ (end of stream) -#define LZFSE_UNCOMPRESSED_BLOCK_MAGIC 0x2d787662 // bvx- (raw data) -#define LZFSE_COMPRESSEDV1_BLOCK_MAGIC 0x31787662 // bvx1 (lzfse compressed, uncompressed tables) -#define LZFSE_COMPRESSEDV2_BLOCK_MAGIC 0x32787662 // bvx2 (lzfse compressed, compressed tables) +#define LZFSE_NO_BLOCK_MAGIC 0x00000000 // 0 (invalid) +#define LZFSE_ENDOFSTREAM_BLOCK_MAGIC 0x24787662 // bvx$ (end of stream) +#define LZFSE_UNCOMPRESSED_BLOCK_MAGIC 0x2d787662 // bvx- (raw data) +#define LZFSE_COMPRESSEDV1_BLOCK_MAGIC \ + 0x31787662 // bvx1 (lzfse compressed, uncompressed tables) +#define LZFSE_COMPRESSEDV2_BLOCK_MAGIC \ + 0x32787662 // bvx2 (lzfse compressed, compressed tables) #define LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC 0x6e787662 // bvxn (lzvn compressed) /*! @abstract Uncompressed block header in encoder stream. */ typedef struct { - // Magic number, always LZFSE_UNCOMPRESSED_BLOCK_MAGIC. - uint32_t magic; - // Number of raw bytes in block. - uint32_t n_raw_bytes; + // Magic number, always LZFSE_UNCOMPRESSED_BLOCK_MAGIC. + uint32_t magic; + // Number of raw bytes in block. + uint32_t n_raw_bytes; } uncompressed_block_header; /*! @abstract Compressed block header with uncompressed tables. */ typedef struct { - // Magic number, always LZFSE_COMPRESSEDV1_BLOCK_MAGIC. - uint32_t magic; - // Number of decoded (output) bytes in block. - uint32_t n_raw_bytes; - // Number of encoded (source) bytes in block. - uint32_t n_payload_bytes; - // Number of literal bytes output by block (*not* the number of literals). - uint32_t n_literals; - // Number of matches in block (which is also the number of literals). - uint32_t n_matches; - // Number of bytes used to encode literals. - uint32_t n_literal_payload_bytes; - // Number of bytes used to encode matches. - uint32_t n_lmd_payload_bytes; - - // Final encoder states for the block, which will be the initial states for - // the decoder: - // Final accum_nbits for literals stream. - int32_t literal_bits; - // There are four interleaved streams of literals, so there are four final - // states. - uint16_t literal_state[4]; - // accum_nbits for the l, m, d stream. - int32_t lmd_bits; - // Final L (literal length) state. - uint16_t l_state; - // Final M (match length) state. - uint16_t m_state; - // Final D (match distance) state. - uint16_t d_state; - - // Normalized frequency tables for each stream. Sum of values in each - // array is the number of states. - uint16_t l_freq[LZFSE_ENCODE_L_SYMBOLS]; - uint16_t m_freq[LZFSE_ENCODE_M_SYMBOLS]; - uint16_t d_freq[LZFSE_ENCODE_D_SYMBOLS]; - uint16_t literal_freq[LZFSE_ENCODE_LITERAL_SYMBOLS]; + // Magic number, always LZFSE_COMPRESSEDV1_BLOCK_MAGIC. + uint32_t magic; + // Number of decoded (output) bytes in block. + uint32_t n_raw_bytes; + // Number of encoded (source) bytes in block. + uint32_t n_payload_bytes; + // Number of literal bytes output by block (*not* the number of literals). + uint32_t n_literals; + // Number of matches in block (which is also the number of literals). + uint32_t n_matches; + // Number of bytes used to encode literals. + uint32_t n_literal_payload_bytes; + // Number of bytes used to encode matches. + uint32_t n_lmd_payload_bytes; + + // Final encoder states for the block, which will be the initial states for + // the decoder: + // Final accum_nbits for literals stream. + int32_t literal_bits; + // There are four interleaved streams of literals, so there are four final + // states. + uint16_t literal_state[4]; + // accum_nbits for the l, m, d stream. + int32_t lmd_bits; + // Final L (literal length) state. + uint16_t l_state; + // Final M (match length) state. + uint16_t m_state; + // Final D (match distance) state. + uint16_t d_state; + + // Normalized frequency tables for each stream. Sum of values in each + // array is the number of states. + uint16_t l_freq[LZFSE_ENCODE_L_SYMBOLS]; + uint16_t m_freq[LZFSE_ENCODE_M_SYMBOLS]; + uint16_t d_freq[LZFSE_ENCODE_D_SYMBOLS]; + uint16_t literal_freq[LZFSE_ENCODE_LITERAL_SYMBOLS]; } lzfse_compressed_block_header_v1; /*! @abstract Compressed block header with compressed tables. Note that because @@ -335,56 +289,57 @@ typedef struct { * we only store the used bytes of freq[]. This means that some extra care must * be taken when reading one of these headers from the stream. */ typedef struct { - // Magic number, always LZFSE_COMPRESSEDV2_BLOCK_MAGIC. - uint32_t magic; - // Number of decoded (output) bytes in block. - uint32_t n_raw_bytes; - // The fields n_payload_bytes ... d_state from the - // lzfse_compressed_block_header_v1 object are packed into three 64-bit - // fields in the compressed header, as follows: - // - // offset bits value - // 0 20 n_literals - // 20 20 n_literal_payload_bytes - // 40 20 n_matches - // 60 3 literal_bits - // 63 1 --- unused --- - // - // 0 10 literal_state[0] - // 10 10 literal_state[1] - // 20 10 literal_state[2] - // 30 10 literal_state[3] - // 40 20 n_lmd_payload_bytes - // 60 3 lmd_bits - // 63 1 --- unused --- - // - // 0 32 header_size (total header size in bytes; this does not - // correspond to a field in the uncompressed header version, - // but is required; we wouldn't know the size of the - // compresssed header otherwise. - // 32 10 l_state - // 42 10 m_state - // 52 10 d_state - // 62 2 --- unused --- - uint64_t packed_fields[3]; - // Variable size freq tables, using a Huffman-style fixed encoding. - // Size allocated here is an upper bound (all values stored on 16 bits). - uint8_t freq[2 * (LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + - LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS)]; -} __attribute__((__packed__, __aligned__(1))) -lzfse_compressed_block_header_v2; + // Magic number, always LZFSE_COMPRESSEDV2_BLOCK_MAGIC. + uint32_t magic; + // Number of decoded (output) bytes in block. + uint32_t n_raw_bytes; + // The fields n_payload_bytes ... d_state from the + // lzfse_compressed_block_header_v1 object are packed into three 64-bit + // fields in the compressed header, as follows: + // + // offset bits value + // 0 20 n_literals + // 20 20 n_literal_payload_bytes + // 40 20 n_matches + // 60 3 literal_bits + // 63 1 --- unused --- + // + // 0 10 literal_state[0] + // 10 10 literal_state[1] + // 20 10 literal_state[2] + // 30 10 literal_state[3] + // 40 20 n_lmd_payload_bytes + // 60 3 lmd_bits + // 63 1 --- unused --- + // + // 0 32 header_size (total header size in bytes; this does not + // correspond to a field in the uncompressed header version, + // but is required; we wouldn't know the size of the + // compresssed header otherwise. + // 32 10 l_state + // 42 10 m_state + // 52 10 d_state + // 62 2 --- unused --- + uint64_t packed_fields[3]; + // Variable size freq tables, using a Huffman-style fixed encoding. + // Size allocated here is an upper bound (all values stored on 16 bits). + uint8_t freq[2 * + (LZFSE_ENCODE_L_SYMBOLS + LZFSE_ENCODE_M_SYMBOLS + + LZFSE_ENCODE_D_SYMBOLS + LZFSE_ENCODE_LITERAL_SYMBOLS)]; +} __attribute__((__packed__, __aligned__(1))) lzfse_compressed_block_header_v2; /*! @abstract LZVN compressed block header. */ typedef struct { - // Magic number, always LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC. - uint32_t magic; - // Number of decoded (output) bytes. - uint32_t n_raw_bytes; - // Number of encoded (source) bytes. - uint32_t n_payload_bytes; + // Magic number, always LZFSE_COMPRESSEDLZVN_BLOCK_MAGIC. + uint32_t magic; + // Number of decoded (output) bytes. + uint32_t n_raw_bytes; + // Number of encoded (source) bytes. + uint32_t n_payload_bytes; } lzvn_compressed_block_header; -// MARK: - LZFSE encode/decode interfaces +/* MARK: - LZFSE encode/decode interfaces + */ int lzfse_encode_init(lzfse_encoder_state *s); int lzfse_encode_translate(lzfse_encoder_state *s, lzfse_offset delta); @@ -392,27 +347,31 @@ int lzfse_encode_base(lzfse_encoder_state *s); int lzfse_encode_finish(lzfse_encoder_state *s); int lzfse_decode(lzfse_decoder_state *s); -// MARK: - LZVN encode/decode interfaces +/* MARK: - LZVN encode/decode interfaces + */ -// Minimum source buffer size for compression. Smaller buffers will not be -// compressed; the lzvn encoder will simply return. +/* Minimum source buffer size for compression. Smaller buffers will not be + * compressed; the lzvn encoder will simply return. + */ #define LZVN_ENCODE_MIN_SRC_SIZE ((size_t)8) -// Maximum source buffer size for compression. Larger buffers will be -// compressed partially. +/* Maximum source buffer size for compression. Larger buffers will be + * compressed partially. + */ #define LZVN_ENCODE_MAX_SRC_SIZE ((size_t)0xffffffffU) -// Minimum destination buffer size for compression. No compression will take -// place if smaller. +/* Minimum destination buffer size for compression. No compression will take + * place if smaller. + */ #define LZVN_ENCODE_MIN_DST_SIZE ((size_t)8) size_t lzvn_decode_scratch_size(void); size_t lzvn_encode_scratch_size(void); size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size, - const void *__restrict src, size_t src_size, - void *__restrict work); + const void *__restrict src, size_t src_size, + void *__restrict work); size_t lzvn_decode_buffer(void *__restrict dst, size_t dst_size, - const void *__restrict src, size_t src_size); + const void *__restrict src, size_t src_size); /*! @abstract Signed offset in buffers, stored on either 32 or 64 bits. */ #if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__) @@ -421,38 +380,45 @@ typedef int64_t lzvn_offset; typedef int32_t lzvn_offset; #endif -// MARK: - LZFSE utility functions +/* MARK: - LZFSE utility functions + */ /*! @abstract Load bytes from memory location SRC. */ -LZFSE_INLINE uint16_t load2(const void *ptr) { - uint16_t data; - memcpy(&data, ptr, sizeof data); - return data; +LZFSE_INLINE uint16_t load2(const void *ptr) +{ + uint16_t data; + memcpy(&data, ptr, sizeof data); + return data; } -LZFSE_INLINE uint32_t load4(const void *ptr) { - uint32_t data; - memcpy(&data, ptr, sizeof data); - return data; +LZFSE_INLINE uint32_t load4(const void *ptr) +{ + uint32_t data; + memcpy(&data, ptr, sizeof data); + return data; } -LZFSE_INLINE uint64_t load8(const void *ptr) { - uint64_t data; - memcpy(&data, ptr, sizeof data); - return data; +LZFSE_INLINE uint64_t load8(const void *ptr) +{ + uint64_t data; + memcpy(&data, ptr, sizeof data); + return data; } /*! @abstract Store bytes to memory location DST. */ -LZFSE_INLINE void store2(void *ptr, uint16_t data) { - memcpy(ptr, &data, sizeof data); +LZFSE_INLINE void store2(void *ptr, uint16_t data) +{ + memcpy(ptr, &data, sizeof data); } -LZFSE_INLINE void store4(void *ptr, uint32_t data) { - memcpy(ptr, &data, sizeof data); +LZFSE_INLINE void store4(void *ptr, uint32_t data) +{ + memcpy(ptr, &data, sizeof data); } -LZFSE_INLINE void store8(void *ptr, uint64_t data) { - memcpy(ptr, &data, sizeof data); +LZFSE_INLINE void store8(void *ptr, uint64_t data) +{ + memcpy(ptr, &data, sizeof data); } /*! @abstract Load+store bytes from locations SRC to DST. Not intended for use @@ -460,25 +426,31 @@ LZFSE_INLINE void store8(void *ptr, uint64_t data) { * copies to behave like naive memcpy( ) implementations do, splatting the * leading sequence if the buffers overlap. This copy does not do that, so * should not be used with overlapping buffers. */ -LZFSE_INLINE void copy8(void *dst, const void *src) { store8(dst, load8(src)); } -LZFSE_INLINE void copy16(void *dst, const void *src) { - uint64_t m0 = load8(src); - uint64_t m1 = load8((const unsigned char *)src + 8); - store8(dst, m0); - store8((unsigned char *)dst + 8, m1); +LZFSE_INLINE void copy8(void *dst, const void *src) +{ + store8(dst, load8(src)); +} +LZFSE_INLINE void copy16(void *dst, const void *src) +{ + uint64_t m0 = load8(src); + uint64_t m1 = load8((const unsigned char *)src + 8); + store8(dst, m0); + store8((unsigned char *)dst + 8, m1); } -// =============================================================== -// Bitfield Operations +/* =============================================================== + * Bitfield Operations + */ /*! @abstract Extracts \p width bits from \p container, starting with \p lsb; if * we view \p container as a bit array, we extract \c container[lsb:lsb+width]. */ LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb, - unsigned width) { - static const size_t container_width = sizeof container * 8; - if (width == container_width) - return container; - return (container >> lsb) & (((uintmax_t)1 << width) - 1); + unsigned width) +{ + static const size_t container_width = sizeof container * 8; + if (width == container_width) + return container; + return (container >> lsb) & (((uintmax_t)1 << width) - 1); } /*! @abstract Inserts \p width bits from \p data into \p container, starting with \p lsb. @@ -490,13 +462,14 @@ LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb, * @endcode */ LZFSE_INLINE uintmax_t insert(uintmax_t container, uintmax_t data, unsigned lsb, - unsigned width) { - static const size_t container_width = sizeof container * 8; - uintmax_t mask; - if (width == container_width) - return container; - mask = ((uintmax_t)1 << width) - 1; - return (container & ~(mask << lsb)) | (data & mask) << lsb; + unsigned width) +{ + static const size_t container_width = sizeof container * 8; + uintmax_t mask; + if (width == container_width) + return container; + mask = ((uintmax_t)1 << width) - 1; + return (container & ~(mask << lsb)) | (data & mask) << lsb; } /*! @abstract Perform sanity checks on the values of lzfse_compressed_block_header_v1. @@ -504,68 +477,81 @@ LZFSE_INLINE uintmax_t insert(uintmax_t container, uintmax_t data, unsigned lsb, * frequency tables sum to value less than total number of states. * @return 0 if all tests passed. * @return negative error code with 1 bit set for each failed test. */ -LZFSE_INLINE int lzfse_check_block_header_v1( - const lzfse_compressed_block_header_v1 *header) { - int tests_results = 0; - uint16_t literal_state[4]; - int res; - tests_results = - tests_results | - ((header->magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC) ? 0 : (1 << 0)); - tests_results = - tests_results | - ((header->n_literals <= LZFSE_LITERALS_PER_BLOCK) ? 0 : (1 << 1)); - tests_results = - tests_results | - ((header->n_matches <= LZFSE_MATCHES_PER_BLOCK) ? 0 : (1 << 2)); - - memcpy(literal_state, header->literal_state, sizeof(uint16_t) * 4); - - tests_results = - tests_results | - ((literal_state[0] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 3)); - tests_results = - tests_results | - ((literal_state[1] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 4)); - tests_results = - tests_results | - ((literal_state[2] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 5)); - tests_results = - tests_results | - ((literal_state[3] < LZFSE_ENCODE_LITERAL_STATES) ? 0 : (1 << 6)); - - tests_results = tests_results | - ((header->l_state < LZFSE_ENCODE_L_STATES) ? 0 : (1 << 7)); - tests_results = tests_results | - ((header->m_state < LZFSE_ENCODE_M_STATES) ? 0 : (1 << 8)); - tests_results = tests_results | - ((header->d_state < LZFSE_ENCODE_D_STATES) ? 0 : (1 << 9)); - - res = fse_check_freq(header->l_freq, LZFSE_ENCODE_L_SYMBOLS, - LZFSE_ENCODE_L_STATES); - tests_results = tests_results | ((res == 0) ? 0 : (1 << 10)); - res = fse_check_freq(header->m_freq, LZFSE_ENCODE_M_SYMBOLS, - LZFSE_ENCODE_M_STATES); - tests_results = tests_results | ((res == 0) ? 0 : (1 << 11)); - res = fse_check_freq(header->d_freq, LZFSE_ENCODE_D_SYMBOLS, - LZFSE_ENCODE_D_STATES); - tests_results = tests_results | ((res == 0) ? 0 : (1 << 12)); - res = fse_check_freq(header->literal_freq, LZFSE_ENCODE_LITERAL_SYMBOLS, - LZFSE_ENCODE_LITERAL_STATES); - tests_results = tests_results | ((res == 0) ? 0 : (1 << 13)); - - if (tests_results) { - return tests_results | 0x80000000; // each 1 bit is a test that failed - // (except for the sign bit) - } - - return 0; // OK +LZFSE_INLINE int +lzfse_check_block_header_v1(const lzfse_compressed_block_header_v1 *header) +{ + int tests_results = 0; + uint16_t literal_state[4]; + int res; + tests_results = tests_results | + ((header->magic == LZFSE_COMPRESSEDV1_BLOCK_MAGIC) ? + 0 : + (1 << 0)); + tests_results = tests_results | + ((header->n_literals <= LZFSE_LITERALS_PER_BLOCK) ? + 0 : + (1 << 1)); + tests_results = + tests_results | + ((header->n_matches <= LZFSE_MATCHES_PER_BLOCK) ? 0 : (1 << 2)); + + memcpy(literal_state, header->literal_state, sizeof(uint16_t) * 4); + + tests_results = tests_results | + ((literal_state[0] < LZFSE_ENCODE_LITERAL_STATES) ? + 0 : + (1 << 3)); + tests_results = tests_results | + ((literal_state[1] < LZFSE_ENCODE_LITERAL_STATES) ? + 0 : + (1 << 4)); + tests_results = tests_results | + ((literal_state[2] < LZFSE_ENCODE_LITERAL_STATES) ? + 0 : + (1 << 5)); + tests_results = tests_results | + ((literal_state[3] < LZFSE_ENCODE_LITERAL_STATES) ? + 0 : + (1 << 6)); + + tests_results = + tests_results | + ((header->l_state < LZFSE_ENCODE_L_STATES) ? 0 : (1 << 7)); + tests_results = + tests_results | + ((header->m_state < LZFSE_ENCODE_M_STATES) ? 0 : (1 << 8)); + tests_results = + tests_results | + ((header->d_state < LZFSE_ENCODE_D_STATES) ? 0 : (1 << 9)); + + res = fse_check_freq(header->l_freq, LZFSE_ENCODE_L_SYMBOLS, + LZFSE_ENCODE_L_STATES); + tests_results = tests_results | ((res == 0) ? 0 : (1 << 10)); + res = fse_check_freq(header->m_freq, LZFSE_ENCODE_M_SYMBOLS, + LZFSE_ENCODE_M_STATES); + tests_results = tests_results | ((res == 0) ? 0 : (1 << 11)); + res = fse_check_freq(header->d_freq, LZFSE_ENCODE_D_SYMBOLS, + LZFSE_ENCODE_D_STATES); + tests_results = tests_results | ((res == 0) ? 0 : (1 << 12)); + res = fse_check_freq(header->literal_freq, LZFSE_ENCODE_LITERAL_SYMBOLS, + LZFSE_ENCODE_LITERAL_STATES); + tests_results = tests_results | ((res == 0) ? 0 : (1 << 13)); + + if (tests_results) { + return tests_results | + 0x80000000; // each 1 bit is a test that failed + // (except for the sign bit) + } + + return 0; // OK } -// MARK: - L, M, D encoding constants for LZFSE +/* MARK: - L, M, D encoding constants for LZFSE + */ -// Largest encodable L (literal length), M (match length) and D (match -// distance) values. +/* Largest encodable L (literal length), M (match length) and D (match + * distance) values. + */ #define LZFSE_ENCODE_MAX_L_VALUE 315 #define LZFSE_ENCODE_MAX_M_VALUE 2359 #define LZFSE_ENCODE_MAX_D_VALUE 262139 @@ -582,31 +568,32 @@ LZFSE_INLINE int lzfse_check_block_header_v1( * Those tables have been split out to lzfse_encode_tables.h in order to keep * this file relatively small. */ static const uint8_t l_extra_bits[LZFSE_ENCODE_L_SYMBOLS] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 5, 8 }; static const int32_t l_base_value[LZFSE_ENCODE_L_SYMBOLS] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 20, 28, 60 }; static const uint8_t m_extra_bits[LZFSE_ENCODE_M_SYMBOLS] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11 }; static const int32_t m_base_value[LZFSE_ENCODE_M_SYMBOLS] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312 + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 56, 312 }; static const uint8_t d_extra_bits[LZFSE_ENCODE_D_SYMBOLS] = { - 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, - 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15 + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, + 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, + 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15 }; static const int32_t d_base_value[LZFSE_ENCODE_D_SYMBOLS] = { - 0, 1, 2, 3, 4, 6, 8, 10, 12, 16, - 20, 24, 28, 36, 44, 52, 60, 76, 92, 108, - 124, 156, 188, 220, 252, 316, 380, 444, 508, 636, - 764, 892, 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580, - 4092, 5116, 6140, 7164, 8188, 10236, 12284, 14332, 16380, 20476, - 24572, 28668, 32764, 40956, 49148, 57340, 65532, 81916, 98300, 114684, - 131068, 163836, 196604, 229372 + 0, 1, 2, 3, 4, 6, 8, 10, + 12, 16, 20, 24, 28, 36, 44, 52, + 60, 76, 92, 108, 124, 156, 188, 220, + 252, 316, 380, 444, 508, 636, 764, 892, + 1020, 1276, 1532, 1788, 2044, 2556, 3068, 3580, + 4092, 5116, 6140, 7164, 8188, 10236, 12284, 14332, + 16380, 20476, 24572, 28668, 32764, 40956, 49148, 57340, + 65532, 81916, 98300, 114684, 131068, 163836, 196604, 229372 }; #endif // LZFSE_INTERNAL_H diff --git a/lzfse/lzfse_main.c b/lzfse/lzfse_main.c index dd4df99..bdccc5a 100644 --- a/lzfse/lzfse_main.c +++ b/lzfse/lzfse_main.c @@ -1,42 +1,11 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZFSE command line tool - -#if !defined(_POSIX_C_SOURCE) || (_POSIX_C_SOURCE < 200112L) -# undef _POSIX_C_SOURCE -# define _POSIX_C_SOURCE 200112L -#endif - -#if defined(_MSC_VER) -# if !defined(_CRT_NONSTDC_NO_DEPRECATE) -# define _CRT_NONSTDC_NO_DEPRECATE -# endif -# if !defined(_CRT_SECURE_NO_WARNINGS) -# define _CRT_SECURE_NO_WARNINGS -# endif -# if !defined(__clang__) -# define inline __inline -# endif -#endif +/* LZFSE command line tool + */ #include "lzfse.h" #include @@ -46,291 +15,270 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSI #include #include -#if defined(_MSC_VER) -# include -# include -#else -# include -# include -#endif - -// Same as realloc(x,s), except x is freed when realloc fails -static inline void *lzfse_reallocf(void *x, size_t s) { - void *y = realloc(x, s); - if (y == 0) { - free(x); - return 0; - } - return y; +/* Same as realloc(x,s), except x is freed when realloc fails + */ +static inline void *lzfse_reallocf(void *x, size_t s) +{ + void *y = realloc(x, s); + if (y == 0) { + free(x); + return 0; + } + return y; } -static double get_time() { -#if defined(_MSC_VER) - LARGE_INTEGER count, freq; - if (QueryPerformanceFrequency(&freq) && QueryPerformanceCounter(&count)) { - return (double)count.QuadPart / (double)freq.QuadPart; - } - return 1.0e-3 * (double)GetTickCount(); -#else - struct timeval tv; - if (gettimeofday(&tv, 0) != 0) { - perror("gettimeofday"); - exit(1); - } - return (double)tv.tv_sec + 1.0e-6 * (double)tv.tv_usec; -#endif +static double get_time() +{ + struct timeval tv; + if (gettimeofday(&tv, 0) != 0) { + perror("gettimeofday"); + exit(1); + } + return (double)tv.tv_sec + 1.0e-6 * (double)tv.tv_usec; } -//-------------------- +/* -------------------- + */ enum { LZFSE_ENCODE = 0, LZFSE_DECODE }; -void usage(int argc, char **argv) { - fprintf( - stderr, - "Usage: %s -encode|-decode [-i input_file] [-o output_file] [-h] [-v]\n", - argv[0]); +void usage(int argc, char **argv) +{ + fprintf(stderr, + "Usage: %s -encode|-decode [-i input_file] [-o output_file] [-h] [-v]\n", + argv[0]); } -#define USAGE(argc, argv) \ - do { \ - usage(argc, argv); \ - exit(0); \ - } while (0) -#define USAGE_MSG(argc, argv, ...) \ - do { \ - usage(argc, argv); \ - fprintf(stderr, __VA_ARGS__); \ - exit(1); \ - } while (0) - -int main(int argc, char **argv) { - const char *in_file = 0; // stdin - const char *out_file = 0; // stdout - int op = -1; // invalid op - int verbosity = 0; // quiet - - // Parse options - for (int i = 1; i < argc;) { - // no args - const char *a = argv[i++]; - if (strcmp(a, "-h") == 0) - USAGE(argc, argv); - if (strcmp(a, "-v") == 0) { - verbosity++; - continue; - } - if (strcmp(a, "-encode") == 0) { - op = LZFSE_ENCODE; - continue; - } - if (strcmp(a, "-decode") == 0) { - op = LZFSE_DECODE; - continue; - } - - // one arg - const char **arg_var = 0; - if (strcmp(a, "-i") == 0 && in_file == 0) - arg_var = &in_file; - else if (strcmp(a, "-o") == 0 && out_file == 0) - arg_var = &out_file; - if (arg_var != 0) { - // Flag is recognized. Check if there is an argument. - if (i == argc) - USAGE_MSG(argc, argv, "Error: Missing arg after %s\n", a); - *arg_var = argv[i++]; - continue; - } - - USAGE_MSG(argc, argv, "Error: invalid flag %s\n", a); - } - if (op < 0) - USAGE_MSG(argc, argv, "Error: -encode|-decode required\n"); - - // Info - if (verbosity > 0) { - if (op == LZFSE_ENCODE) - fprintf(stderr, "LZFSE encode\n"); - if (op == LZFSE_DECODE) - fprintf(stderr, "LZFSE decode\n"); - fprintf(stderr, "Input: %s\n", in_file ? in_file : "stdin"); - fprintf(stderr, "Output: %s\n", out_file ? out_file : "stdout"); - } - - // Load input - size_t in_allocated = 0; // allocated in IN - size_t in_size = 0; // used in IN - uint8_t *in = 0; // input buffer - int in_fd = -1; // input file desc - - if (in_file != 0) { - // If we have a file name, open it, and allocate the exact input size - struct stat st; -#if defined(_WIN32) - in_fd = open(in_file, O_RDONLY | O_BINARY); -#else - in_fd = open(in_file, O_RDONLY); -#endif - if (in_fd < 0) { - perror(in_file); - exit(1); - } - if (fstat(in_fd, &st) != 0) { - perror(in_file); - exit(1); - } - if (st.st_size > SIZE_MAX) { - fprintf(stderr, "File is too large\n"); - exit(1); - } - in_allocated = (size_t)st.st_size; - } else { - // Otherwise, read from stdin, and allocate to 1 MB, grow as needed - in_allocated = 1 << 20; - in_fd = 0; -#if defined(_WIN32) - if (setmode(in_fd, O_BINARY) == -1) { - perror("setmode"); - exit(1); - } -#endif - } - in = (uint8_t *)malloc(in_allocated); - if (in == 0) { - perror("malloc"); - exit(1); - } - - while (1) { - // re-alloc if needed - if (in_size == in_allocated) { - if (in_allocated < (100 << 20)) - in_allocated <<= 1; // double it - else - in_allocated += (100 << 20); // or add 100 MB if already large - in = lzfse_reallocf(in, in_allocated); - if (in == 0) { - perror("malloc"); - exit(1); - } - } - - ptrdiff_t r = read(in_fd, in + in_size, in_allocated - in_size); - if (r < 0) { - perror("read"); - exit(1); - } - if (r == 0) - break; // end of file - in_size += (size_t)r; - } - - if (in_file != 0) { - close(in_fd); - in_fd = -1; - } - - // Size info - if (verbosity > 0) { - fprintf(stderr, "Input size: %zu B\n", in_size); - } - - // Encode/decode - // Compute size for result buffer; we assume here that encode shrinks size, - // and that decode grows by no more than 4x. These are reasonable common- - // case guidelines, but are not formally guaranteed to be satisfied. - size_t out_allocated = (op == LZFSE_ENCODE) ? in_size : (4 * in_size); - size_t out_size = 0; - size_t aux_allocated = (op == LZFSE_ENCODE) ? lzfse_encode_scratch_size() - : lzfse_decode_scratch_size(); - void *aux = aux_allocated ? malloc(aux_allocated) : 0; - if (aux_allocated != 0 && aux == 0) { - perror("malloc"); - exit(1); - } - uint8_t *out = (uint8_t *)malloc(out_allocated); - if (out == 0) { - perror("malloc"); - exit(1); - } - - double c0 = get_time(); - while (1) { - if (op == LZFSE_ENCODE) - out_size = lzfse_encode_buffer(out, out_allocated, in, in_size, aux); - else - out_size = lzfse_decode_buffer(out, out_allocated, in, in_size, aux); - - // If output buffer was too small, grow and retry. - if (out_size == 0 || (op == LZFSE_DECODE && out_size == out_allocated)) { - if (verbosity > 0) - fprintf(stderr, "Output buffer was too small, increasing size...\n"); - out_allocated <<= 1; - out = (uint8_t *)lzfse_reallocf(out, out_allocated); - if (out == 0) { - perror("malloc"); - exit(1); - } - continue; - } - - break; - } - double c1 = get_time(); - - if (verbosity > 0) { - fprintf(stderr, "Output size: %zu B\n", out_size); - size_t raw_size = (op == LZFSE_ENCODE) ? in_size : out_size; - size_t compressed_size = (op == LZFSE_ENCODE) ? out_size : in_size; - fprintf(stderr, "Compression ratio: %.3f\n", - (double)raw_size / (double)compressed_size); - double ns_per_byte = 1.0e9 * (c1 - c0) / (double)raw_size; - double mb_per_s = (double)raw_size / 1024.0 / 1024.0 / (c1 - c0); - fprintf(stderr, "Speed: %.2f ns/B, %.2f MB/s\n",ns_per_byte,mb_per_s); - } - - // Write output - int out_fd = -1; - if (out_file) { -#if defined(_WIN32) - out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, - S_IWRITE); -#else - out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644); -#endif - if (out_fd < 0) { - perror(out_file); - exit(1); - } - } else { - out_fd = 1; // stdout -#if defined(_WIN32) - if (setmode(out_fd, O_BINARY) == -1) { - perror("setmode"); - exit(1); - } -#endif - } - for (size_t out_pos = 0; out_pos < out_size;) { - ptrdiff_t w = write(out_fd, out + out_pos, out_size - out_pos); - if (w < 0) { - perror("write"); - exit(1); - } - if (w == 0) { - fprintf(stderr, "Failed to write to output file\n"); - exit(1); - } - out_pos += (size_t)w; - } - if (out_file != 0) { - close(out_fd); - out_fd = -1; - } - - free(in); - free(out); - free(aux); - return 0; // OK +#define USAGE(argc, argv) \ + do { \ + usage(argc, argv); \ + exit(0); \ + } while (0) +#define USAGE_MSG(argc, argv, ...) \ + do { \ + usage(argc, argv); \ + fprintf(stderr, __VA_ARGS__); \ + exit(1); \ + } while (0) + +int main(int argc, char **argv) +{ + const char *in_file = 0; // stdin + const char *out_file = 0; // stdout + int op = -1; // invalid op + int verbosity = 0; // quiet + + // Parse options + for (int i = 1; i < argc;) { + // no args + const char *a = argv[i++]; + if (strcmp(a, "-h") == 0) + USAGE(argc, argv); + if (strcmp(a, "-v") == 0) { + verbosity++; + continue; + } + if (strcmp(a, "-encode") == 0) { + op = LZFSE_ENCODE; + continue; + } + if (strcmp(a, "-decode") == 0) { + op = LZFSE_DECODE; + continue; + } + + // one arg + const char **arg_var = 0; + if (strcmp(a, "-i") == 0 && in_file == 0) + arg_var = &in_file; + else if (strcmp(a, "-o") == 0 && out_file == 0) + arg_var = &out_file; + if (arg_var != 0) { + // Flag is recognized. Check if there is an argument. + if (i == argc) + USAGE_MSG(argc, argv, + "Error: Missing arg after %s\n", a); + *arg_var = argv[i++]; + continue; + } + + USAGE_MSG(argc, argv, "Error: invalid flag %s\n", a); + } + if (op < 0) + USAGE_MSG(argc, argv, "Error: -encode|-decode required\n"); + + // Info + if (verbosity > 0) { + if (op == LZFSE_ENCODE) + fprintf(stderr, "LZFSE encode\n"); + if (op == LZFSE_DECODE) + fprintf(stderr, "LZFSE decode\n"); + fprintf(stderr, "Input: %s\n", in_file ? in_file : "stdin"); + fprintf(stderr, "Output: %s\n", out_file ? out_file : "stdout"); + } + + // Load input + size_t in_allocated = 0; // allocated in IN + size_t in_size = 0; // used in IN + uint8_t *in = 0; // input buffer + int in_fd = -1; // input file desc + + if (in_file != 0) { + // If we have a file name, open it, and allocate the exact input size + struct stat st; + in_fd = open(in_file, O_RDONLY); + if (in_fd < 0) { + perror(in_file); + exit(1); + } + if (fstat(in_fd, &st) != 0) { + perror(in_file); + exit(1); + } + if (st.st_size > SIZE_MAX) { + fprintf(stderr, "File is too large\n"); + exit(1); + } + in_allocated = (size_t)st.st_size; + } else { + // Otherwise, read from stdin, and allocate to 1 MB, grow as needed + in_allocated = 1 << 20; + in_fd = 0; + } + in = (uint8_t *)malloc(in_allocated); + if (in == 0) { + perror("malloc"); + exit(1); + } + + while (1) { + // re-alloc if needed + if (in_size == in_allocated) { + if (in_allocated < (100 << 20)) + in_allocated <<= 1; // double it + else + in_allocated += + (100 + << 20); // or add 100 MB if already large + in = lzfse_reallocf(in, in_allocated); + if (in == 0) { + perror("malloc"); + exit(1); + } + } + + ptrdiff_t r = read(in_fd, in + in_size, in_allocated - in_size); + if (r < 0) { + perror("read"); + exit(1); + } + if (r == 0) + break; // end of file + in_size += (size_t)r; + } + + if (in_file != 0) { + close(in_fd); + in_fd = -1; + } + + // Size info + if (verbosity > 0) { + fprintf(stderr, "Input size: %zu B\n", in_size); + } + + // Encode/decode + // Compute size for result buffer; we assume here that encode shrinks size, + // and that decode grows by no more than 4x. These are reasonable common- + // case guidelines, but are not formally guaranteed to be satisfied. + size_t out_allocated = (op == LZFSE_ENCODE) ? in_size : (4 * in_size); + size_t out_size = 0; + size_t aux_allocated = (op == LZFSE_ENCODE) ? + lzfse_encode_scratch_size() : + lzfse_decode_scratch_size(); + void *aux = aux_allocated ? malloc(aux_allocated) : 0; + if (aux_allocated != 0 && aux == 0) { + perror("malloc"); + exit(1); + } + uint8_t *out = (uint8_t *)malloc(out_allocated); + if (out == 0) { + perror("malloc"); + exit(1); + } + + double c0 = get_time(); + while (1) { + if (op == LZFSE_ENCODE) + out_size = lzfse_encode_buffer(out, out_allocated, in, + in_size, aux); + else + out_size = lzfse_decode_buffer(out, out_allocated, in, + in_size, aux); + + // If output buffer was too small, grow and retry. + if (out_size == 0 || + (op == LZFSE_DECODE && out_size == out_allocated)) { + if (verbosity > 0) + fprintf(stderr, + "Output buffer was too small, increasing size...\n"); + out_allocated <<= 1; + out = (uint8_t *)lzfse_reallocf(out, out_allocated); + if (out == 0) { + perror("malloc"); + exit(1); + } + continue; + } + + break; + } + double c1 = get_time(); + + if (verbosity > 0) { + fprintf(stderr, "Output size: %zu B\n", out_size); + size_t raw_size = (op == LZFSE_ENCODE) ? in_size : out_size; + size_t compressed_size = (op == LZFSE_ENCODE) ? out_size : + in_size; + fprintf(stderr, "Compression ratio: %.3f\n", + (double)raw_size / (double)compressed_size); + double ns_per_byte = 1.0e9 * (c1 - c0) / (double)raw_size; + double mb_per_s = + (double)raw_size / 1024.0 / 1024.0 / (c1 - c0); + fprintf(stderr, "Speed: %.2f ns/B, %.2f MB/s\n", ns_per_byte, + mb_per_s); + } + + // Write output + int out_fd = -1; + if (out_file) { + out_fd = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (out_fd < 0) { + perror(out_file); + exit(1); + } + } else { + out_fd = 1; // stdout + } + for (size_t out_pos = 0; out_pos < out_size;) { + ptrdiff_t w = write(out_fd, out + out_pos, out_size - out_pos); + if (w < 0) { + perror("write"); + exit(1); + } + if (w == 0) { + fprintf(stderr, "Failed to write to output file\n"); + exit(1); + } + out_pos += (size_t)w; + } + if (out_file != 0) { + close(out_fd); + out_fd = -1; + } + + free(in); + free(out); + free(aux); + return 0; // OK } diff --git a/lzfse/lzfse_tunables.h b/lzfse/lzfse_tunables.h index a2a3275..cb699bb 100644 --- a/lzfse/lzfse_tunables.h +++ b/lzfse/lzfse_tunables.h @@ -1,60 +1,50 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ #ifndef LZFSE_TUNABLES_H #define LZFSE_TUNABLES_H -// Parameters controlling details of the LZ-style match search. These values -// may be modified to fine tune compression ratio vs. encoding speed, while -// keeping the compressed format compatible with LZFSE. Note that -// modifying them will also change the amount of work space required by -// the encoder. The values here are those used in the compression library -// on iOS and OS X. - -// Number of bits for hash function to produce. Should be in the range -// [10, 16]. Larger values reduce the number of false-positive found during -// the match search, and expand the history table, which may allow additional -// matches to be found, generally improving the achieved compression ratio. -// Larger values also increase the workspace size, and make it less likely -// that the history table will be present in cache, which reduces performance. +/* Parameters controlling details of the LZ-style match search. These values + * may be modified to fine tune compression ratio vs. encoding speed, while + * keeping the compressed format compatible with LZFSE. Note that + * modifying them will also change the amount of work space required by + * the encoder. The values here are those used in the compression library + * on iOS and OS X. + */ + +/* Number of bits for hash function to produce. Should be in the range + * [10, 16]. Larger values reduce the number of false-positive found during + * the match search, and expand the history table, which may allow additional + * matches to be found, generally improving the achieved compression ratio. + * Larger values also increase the workspace size, and make it less likely + * that the history table will be present in cache, which reduces performance. + */ #define LZFSE_ENCODE_HASH_BITS 14 -// Number of positions to store for each line in the history table. May -// be either 4 or 8. Using 8 doubles the size of the history table, which -// increases the chance of finding matches (thus improving compression ratio), -// but also increases the workspace size. +/* Number of positions to store for each line in the history table. May + * be either 4 or 8. Using 8 doubles the size of the history table, which + * increases the chance of finding matches (thus improving compression ratio), + * but also increases the workspace size. + */ #define LZFSE_ENCODE_HASH_WIDTH 4 -// Match length in bytes to cause immediate emission. Generally speaking, -// LZFSE maintains multiple candidate matches and waits to decide which match -// to emit until more information is available. When a match exceeds this -// threshold, it is emitted immediately. Thus, smaller values may give -// somewhat better performance, and larger values may give somewhat better -// compression ratios. +/* Match length in bytes to cause immediate emission. Generally speaking, + * LZFSE maintains multiple candidate matches and waits to decide which match + * to emit until more information is available. When a match exceeds this + * threshold, it is emitted immediately. Thus, smaller values may give + * somewhat better performance, and larger values may give somewhat better + * compression ratios. + */ #define LZFSE_ENCODE_GOOD_MATCH 40 -// When the source buffer is very small, LZFSE doesn't compress as well as -// some simpler algorithms. To maintain reasonable compression for these -// cases, we transition to use LZVN instead if the size of the source buffer -// is below this threshold. +/* When the source buffer is very small, LZFSE doesn't compress as well as + * some simpler algorithms. To maintain reasonable compression for these + * cases, we transition to use LZVN instead if the size of the source buffer + * is below this threshold. + */ #define LZFSE_ENCODE_LZVN_THRESHOLD 4096 #endif // LZFSE_TUNABLES_H diff --git a/lzfse/lzvn_decode_base.c b/lzfse/lzvn_decode_base.c index 77e4199..632bdd6 100644 --- a/lzfse/lzvn_decode_base.c +++ b/lzfse/lzvn_decode_base.c @@ -1,25 +1,11 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZVN low-level decoder +/* LZVN low-level decoder + */ #include "lzvn_decode_base.h" @@ -31,691 +17,705 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSI # endif #endif -// Both the source and destination buffers are represented by a pointer and -// a length; they are *always* updated in concert using this macro; however -// many bytes the pointer is advanced, the length is decremented by the same -// amount. Thus, pointer + length always points to the byte one past the end -// of the buffer. -#define PTR_LEN_INC(_pointer, _length, _increment) \ - (_pointer += _increment, _length -= _increment) +/* Both the source and destination buffers are represented by a pointer and + * a length; they are *always* updated in concert using this macro; however + * many bytes the pointer is advanced, the length is decremented by the same + * amount. Thus, pointer + length always points to the byte one past the end + * of the buffer. + */ +#define PTR_LEN_INC(_pointer, _length, _increment) \ + (_pointer += _increment, _length -= _increment) -// Update state with current positions and distance, corresponding to the -// beginning of an instruction in both streams -#define UPDATE_GOOD \ - (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D) +/* Update state with current positions and distance, corresponding to the + * beginning of an instruction in both streams + */ +#define UPDATE_GOOD \ + (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D) -void lzvn_decode(lzvn_decoder_state *state) { +void lzvn_decode(lzvn_decoder_state *state) +{ #if HAVE_LABELS_AS_VALUES - // Jump table for all instructions - static const void *opc_tbl[256] = { - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, - &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, - &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, - &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, - &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, - &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, - &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, - &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, - &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, - &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, - &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m}; + // Jump table for all instructions + static const void *opc_tbl[256] = { + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos, + &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&nop, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&nop, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, &&lrg_d, + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef, + &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, + &&udef, &&udef, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&med_d, + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, + &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, + &&med_d, &&med_d, &&med_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&sml_d, &&sml_d, &&sml_d, + &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d, &&udef, &&udef, + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, + &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, &&udef, + &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, + &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, + &&sml_l, &&sml_l, &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, + &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, + &&sml_m, &&sml_m, &&sml_m, &&sml_m + }; #endif - size_t src_len = state->src_end - state->src; - size_t dst_len = state->dst_end - state->dst; - const unsigned char *src_ptr = state->src; - unsigned char *dst_ptr = state->dst; - size_t D = state->d_prev; - size_t M; - size_t L; - size_t opc_len; - unsigned char opc; - uint16_t opc23; + size_t src_len = state->src_end - state->src; + size_t dst_len = state->dst_end - state->dst; + const unsigned char *src_ptr = state->src; + unsigned char *dst_ptr = state->dst; + size_t D = state->d_prev; + size_t M; + size_t L; + size_t opc_len; + unsigned char opc; + uint16_t opc23; - if (src_len == 0 || dst_len == 0) - return; // empty buffer + if (src_len == 0 || dst_len == 0) + return; // empty buffer - // Do we have a partially expanded match saved in state? - if (state->L != 0 || state->M != 0) { - L = state->L; - M = state->M; - D = state->D; - opc_len = 0; // we already skipped the op - state->L = state->M = state->D = 0; - if (M == 0) - goto copy_literal; - if (L == 0) - goto copy_match; - goto copy_literal_and_match; - } + // Do we have a partially expanded match saved in state? + if (state->L != 0 || state->M != 0) { + L = state->L; + M = state->M; + D = state->D; + opc_len = 0; // we already skipped the op + state->L = state->M = state->D = 0; + if (M == 0) + goto copy_literal; + if (L == 0) + goto copy_match; + goto copy_literal_and_match; + } - opc = src_ptr[0]; + opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES - goto *opc_tbl[opc]; + goto *opc_tbl[opc]; #else - for (;;) { - switch (opc) { + for (;;) { + switch (opc) { #endif -// =============================================================== -// These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a -// literal and a match. The bulk of their implementations are shared; -// each label here only does the work of setting the opcode length (not -// including any literal bytes), and extracting the literal length, match -// length, and match distance (except in pre_d). They then jump into the -// shared implementation to actually output the literal and match bytes. -// -// No error checking happens in the first stage, except for ensuring that -// the source has enough length to represent the full opcode before -// reading past the first byte. +/* =============================================================== + * These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a + * literal and a match. The bulk of their implementations are shared; + * each label here only does the work of setting the opcode length (not + * including any literal bytes), and extracting the literal length, match + * length, and match distance (except in pre_d). They then jump into the + * shared implementation to actually output the literal and match bytes. + * + * No error checking happens in the first stage, except for ensuring that + * the source has enough length to represent the full opcode before + * reading past the first byte. + */ sml_d: #if !HAVE_LABELS_AS_VALUES - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 8: - case 9: - case 10: - case 11: - case 12: - case 13: - case 16: - case 17: - case 18: - case 19: - case 20: - case 21: - case 24: - case 25: - case 26: - case 27: - case 28: - case 29: - case 32: - case 33: - case 34: - case 35: - case 36: - case 37: - case 40: - case 41: - case 42: - case 43: - case 44: - case 45: - case 48: - case 49: - case 50: - case 51: - case 52: - case 53: - case 56: - case 57: - case 58: - case 59: - case 60: - case 61: - case 64: - case 65: - case 66: - case 67: - case 68: - case 69: - case 72: - case 73: - case 74: - case 75: - case 76: - case 77: - case 80: - case 81: - case 82: - case 83: - case 84: - case 85: - case 88: - case 89: - case 90: - case 91: - case 92: - case 93: - case 96: - case 97: - case 98: - case 99: - case 100: - case 101: - case 104: - case 105: - case 106: - case 107: - case 108: - case 109: - case 128: - case 129: - case 130: - case 131: - case 132: - case 133: - case 136: - case 137: - case 138: - case 139: - case 140: - case 141: - case 144: - case 145: - case 146: - case 147: - case 148: - case 149: - case 152: - case 153: - case 154: - case 155: - case 156: - case 157: - case 192: - case 193: - case 194: - case 195: - case 196: - case 197: - case 200: - case 201: - case 202: - case 203: - case 204: - case 205: +case 0: +case 1: +case 2: +case 3: +case 4: +case 5: +case 8: +case 9: +case 10: +case 11: +case 12: +case 13: +case 16: +case 17: +case 18: +case 19: +case 20: +case 21: +case 24: +case 25: +case 26: +case 27: +case 28: +case 29: +case 32: +case 33: +case 34: +case 35: +case 36: +case 37: +case 40: +case 41: +case 42: +case 43: +case 44: +case 45: +case 48: +case 49: +case 50: +case 51: +case 52: +case 53: +case 56: +case 57: +case 58: +case 59: +case 60: +case 61: +case 64: +case 65: +case 66: +case 67: +case 68: +case 69: +case 72: +case 73: +case 74: +case 75: +case 76: +case 77: +case 80: +case 81: +case 82: +case 83: +case 84: +case 85: +case 88: +case 89: +case 90: +case 91: +case 92: +case 93: +case 96: +case 97: +case 98: +case 99: +case 100: +case 101: +case 104: +case 105: +case 106: +case 107: +case 108: +case 109: +case 128: +case 129: +case 130: +case 131: +case 132: +case 133: +case 136: +case 137: +case 138: +case 139: +case 140: +case 141: +case 144: +case 145: +case 146: +case 147: +case 148: +case 149: +case 152: +case 153: +case 154: +case 155: +case 156: +case 157: +case 192: +case 193: +case 194: +case 195: +case 196: +case 197: +case 200: +case 201: +case 202: +case 203: +case 204: +case 205: #endif - UPDATE_GOOD; - // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL - // where the length of literal (0-3 bytes) is encoded by the high 2 bits of - // the first byte. We first extract the literal length so we know how long - // the opcode is, then check that the source can hold both this opcode and - // at least one byte of the next (because any valid input stream must be - // terminated with an eos token). - opc_len = 2; - L = (size_t)extract(opc, 6, 2); - M = (size_t)extract(opc, 3, 3) + 3; - // We need to ensure that the source buffer is long enough that we can - // safely read this entire opcode, the literal that follows, and the first - // byte of the next opcode. Once we satisfy this requirement, we can - // safely unpack the match distance. A check similar to this one is - // present in all the opcode implementations. - if (src_len <= opc_len + L) - return; // source truncated - D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1]; - goto copy_literal_and_match; + UPDATE_GOOD; + // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL + // where the length of literal (0-3 bytes) is encoded by the high 2 bits of + // the first byte. We first extract the literal length so we know how long + // the opcode is, then check that the source can hold both this opcode and + // at least one byte of the next (because any valid input stream must be + // terminated with an eos token). + opc_len = 2; + L = (size_t)extract(opc, 6, 2); + M = (size_t)extract(opc, 3, 3) + 3; + // We need to ensure that the source buffer is long enough that we can + // safely read this entire opcode, the literal that follows, and the first + // byte of the next opcode. Once we satisfy this requirement, we can + // safely unpack the match distance. A check similar to this one is + // present in all the opcode implementations. + if (src_len <= opc_len + L) + return; // source truncated + D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1]; + goto copy_literal_and_match; med_d: #if !HAVE_LABELS_AS_VALUES - case 160: - case 161: - case 162: - case 163: - case 164: - case 165: - case 166: - case 167: - case 168: - case 169: - case 170: - case 171: - case 172: - case 173: - case 174: - case 175: - case 176: - case 177: - case 178: - case 179: - case 180: - case 181: - case 182: - case 183: - case 184: - case 185: - case 186: - case 187: - case 188: - case 189: - case 190: - case 191: +case 160: +case 161: +case 162: +case 163: +case 164: +case 165: +case 166: +case 167: +case 168: +case 169: +case 170: +case 171: +case 172: +case 173: +case 174: +case 175: +case 176: +case 177: +case 178: +case 179: +case 180: +case 181: +case 182: +case 183: +case 184: +case 185: +case 186: +case 187: +case 188: +case 189: +case 190: +case 191: #endif - UPDATE_GOOD; - // "medium distance": This is a minor variant of the "small distance" - // encoding, where we will now use two extra bytes instead of one to encode - // the restof the match length and distance. This allows an extra two bits - // for the match length, and an extra three bits for the match distance. The - // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL. - opc_len = 3; - L = (size_t)extract(opc, 3, 2); - if (src_len <= opc_len + L) - return; // source truncated - opc23 = load2(&src_ptr[1]); - M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3); - D = (size_t)extract(opc23, 2, 14); - goto copy_literal_and_match; + UPDATE_GOOD; + // "medium distance": This is a minor variant of the "small distance" + // encoding, where we will now use two extra bytes instead of one to encode + // the restof the match length and distance. This allows an extra two bits + // for the match length, and an extra three bits for the match distance. The + // full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL. + opc_len = 3; + L = (size_t)extract(opc, 3, 2); + if (src_len <= opc_len + L) + return; // source truncated + opc23 = load2(&src_ptr[1]); + M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3); + D = (size_t)extract(opc23, 2, 14); + goto copy_literal_and_match; lrg_d: #if !HAVE_LABELS_AS_VALUES - case 7: - case 15: - case 23: - case 31: - case 39: - case 47: - case 55: - case 63: - case 71: - case 79: - case 87: - case 95: - case 103: - case 111: - case 135: - case 143: - case 151: - case 159: - case 199: - case 207: +case 7: +case 15: +case 23: +case 31: +case 39: +case 47: +case 55: +case 63: +case 71: +case 79: +case 87: +case 95: +case 103: +case 111: +case 135: +case 143: +case 151: +case 159: +case 199: +case 207: #endif - UPDATE_GOOD; - // "large distance": This is another variant of the "small distance" - // encoding, where we will now use two extra bytes to encode the match - // distance, which allows distances up to 65535 to be represented. The full - // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL. - opc_len = 3; - L = (size_t)extract(opc, 6, 2); - M = (size_t)extract(opc, 3, 3) + 3; - if (src_len <= opc_len + L) - return; // source truncated - D = load2(&src_ptr[1]); - goto copy_literal_and_match; + UPDATE_GOOD; + // "large distance": This is another variant of the "small distance" + // encoding, where we will now use two extra bytes to encode the match + // distance, which allows distances up to 65535 to be represented. The full + // structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL. + opc_len = 3; + L = (size_t)extract(opc, 6, 2); + M = (size_t)extract(opc, 3, 3) + 3; + if (src_len <= opc_len + L) + return; // source truncated + D = load2(&src_ptr[1]); + goto copy_literal_and_match; pre_d: #if !HAVE_LABELS_AS_VALUES - case 70: - case 78: - case 86: - case 94: - case 102: - case 110: - case 134: - case 142: - case 150: - case 158: - case 198: - case 206: +case 70: +case 78: +case 86: +case 94: +case 102: +case 110: +case 134: +case 142: +case 150: +case 158: +case 198: +case 206: #endif - UPDATE_GOOD; - // "previous distance": This opcode has the structure LLMMM110, where the - // length of the literal (0-3 bytes) is encoded by the high 2 bits of the - // first byte. We first extract the literal length so we know how long - // the opcode is, then check that the source can hold both this opcode and - // at least one byte of the next (because any valid input stream must be - // terminated with an eos token). - opc_len = 1; - L = (size_t)extract(opc, 6, 2); - M = (size_t)extract(opc, 3, 3) + 3; - if (src_len <= opc_len + L) - return; // source truncated - goto copy_literal_and_match; + UPDATE_GOOD; + // "previous distance": This opcode has the structure LLMMM110, where the + // length of the literal (0-3 bytes) is encoded by the high 2 bits of the + // first byte. We first extract the literal length so we know how long + // the opcode is, then check that the source can hold both this opcode and + // at least one byte of the next (because any valid input stream must be + // terminated with an eos token). + opc_len = 1; + L = (size_t)extract(opc, 6, 2); + M = (size_t)extract(opc, 3, 3) + 3; + if (src_len <= opc_len + L) + return; // source truncated + goto copy_literal_and_match; copy_literal_and_match: - // Common implementation of writing data for opcodes that have both a - // literal and a match. We begin by advancing the source pointer past - // the opcode, so that it points at the first literal byte (if L - // is non-zero; otherwise it points at the next opcode). - PTR_LEN_INC(src_ptr, src_len, opc_len); - // Now we copy the literal from the source pointer to the destination. - if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) { - // The literal is 0-3 bytes; if we are not near the end of the buffer, - // we can safely just do a 4 byte copy (which is guaranteed to cover - // the complete literal, and may include some other bytes as well). - store4(dst_ptr, load4(src_ptr)); - } else if (L <= dst_len) { - // We are too close to the end of either the input or output stream - // to be able to safely use a four-byte copy, but we will not exhaust - // either stream (we already know that the source will not be - // exhausted from checks in the individual opcode implementations, - // and we just tested that dst_len > L). Thus, we need to do a - // byte-by-byte copy of the literal. This is slow, but it can only ever - // happen near the very end of a buffer, so it is not an important case to - // optimize. - size_t i; - for (i = 0; i < L; ++i) - dst_ptr[i] = src_ptr[i]; - } else { - // Destination truncated: fill DST, and store partial match + // Common implementation of writing data for opcodes that have both a + // literal and a match. We begin by advancing the source pointer past + // the opcode, so that it points at the first literal byte (if L + // is non-zero; otherwise it points at the next opcode). + PTR_LEN_INC(src_ptr, src_len, opc_len); + // Now we copy the literal from the source pointer to the destination. + if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) { + // The literal is 0-3 bytes; if we are not near the end of the buffer, + // we can safely just do a 4 byte copy (which is guaranteed to cover + // the complete literal, and may include some other bytes as well). + store4(dst_ptr, load4(src_ptr)); + } else if (L <= dst_len) { + // We are too close to the end of either the input or output stream + // to be able to safely use a four-byte copy, but we will not exhaust + // either stream (we already know that the source will not be + // exhausted from checks in the individual opcode implementations, + // and we just tested that dst_len > L). Thus, we need to do a + // byte-by-byte copy of the literal. This is slow, but it can only ever + // happen near the very end of a buffer, so it is not an important case to + // optimize. + size_t i; + for (i = 0; i < L; ++i) + dst_ptr[i] = src_ptr[i]; + } else { + // Destination truncated: fill DST, and store partial match - // Copy partial literal - size_t i; - for (i = 0; i < dst_len; ++i) - dst_ptr[i] = src_ptr[i]; - // Save state - state->src = src_ptr + dst_len; - state->dst = dst_ptr + dst_len; - state->L = L - dst_len; - state->M = M; - state->D = D; - return; // destination truncated - } - // Having completed the copy of the literal, we advance both the source - // and destination pointers by the number of literal bytes. - PTR_LEN_INC(dst_ptr, dst_len, L); - PTR_LEN_INC(src_ptr, src_len, L); - // Check if the match distance is valid; matches must not reference - // bytes that preceed the start of the output buffer, nor can the match - // distance be zero. - if (D > dst_ptr - state->dst_begin || D == 0) - goto invalid_match_distance; + // Copy partial literal + size_t i; + for (i = 0; i < dst_len; ++i) + dst_ptr[i] = src_ptr[i]; + // Save state + state->src = src_ptr + dst_len; + state->dst = dst_ptr + dst_len; + state->L = L - dst_len; + state->M = M; + state->D = D; + return; // destination truncated + } + // Having completed the copy of the literal, we advance both the source + // and destination pointers by the number of literal bytes. + PTR_LEN_INC(dst_ptr, dst_len, L); + PTR_LEN_INC(src_ptr, src_len, L); + // Check if the match distance is valid; matches must not reference + // bytes that preceed the start of the output buffer, nor can the match + // distance be zero. + if (D > dst_ptr - state->dst_begin || D == 0) + goto invalid_match_distance; copy_match: - // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep - // in mind that we may have D < M, in which case the source and destination - // windows overlap in the copy. The semantics of the match copy are *not* - // those of memmove( ); if the buffers overlap it needs to behave as though - // we were copying byte-by-byte in increasing address order. If, for example, - // D is 1, the copy operation is equivalent to: - // - // memset(dst_ptr, dst_ptr[-1], M); - // - // i.e. it splats the previous byte. This means that we need to be very - // careful about using wide loads or stores to perform the copy operation. - if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) { - // We are not near the end of the buffer, and the match distance - // is at least eight. Thus, we can safely loop using eight byte - // copies. The last of these may slop over the intended end of - // the match, but this is OK because we know we have a safety bound - // away from the end of the destination buffer. - size_t i; - for (i = 0; i < M; i += 8) - store8(&dst_ptr[i], load8(&dst_ptr[i - D])); - } else if (M <= dst_len) { - // Either the match distance is too small, or we are too close to - // the end of the buffer to safely use eight byte copies. Fall back - // on a simple byte-by-byte implementation. - size_t i; - for (i = 0; i < M; ++i) - dst_ptr[i] = dst_ptr[i - D]; - } else { - // Destination truncated: fill DST, and store partial match + // Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep + // in mind that we may have D < M, in which case the source and destination + // windows overlap in the copy. The semantics of the match copy are *not* + // those of memmove( ); if the buffers overlap it needs to behave as though + // we were copying byte-by-byte in increasing address order. If, for example, + // D is 1, the copy operation is equivalent to: + // + // memset(dst_ptr, dst_ptr[-1], M); + // + // i.e. it splats the previous byte. This means that we need to be very + // careful about using wide loads or stores to perform the copy operation. + if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) { + // We are not near the end of the buffer, and the match distance + // is at least eight. Thus, we can safely loop using eight byte + // copies. The last of these may slop over the intended end of + // the match, but this is OK because we know we have a safety bound + // away from the end of the destination buffer. + size_t i; + for (i = 0; i < M; i += 8) + store8(&dst_ptr[i], load8(&dst_ptr[i - D])); + } else if (M <= dst_len) { + // Either the match distance is too small, or we are too close to + // the end of the buffer to safely use eight byte copies. Fall back + // on a simple byte-by-byte implementation. + size_t i; + for (i = 0; i < M; ++i) + dst_ptr[i] = dst_ptr[i - D]; + } else { + // Destination truncated: fill DST, and store partial match - // Copy partial match - size_t i; - for (i = 0; i < dst_len; ++i) - dst_ptr[i] = dst_ptr[i - D]; - // Save state - state->src = src_ptr; - state->dst = dst_ptr + dst_len; - state->L = 0; - state->M = M - dst_len; - state->D = D; - return; // destination truncated - } - // Update the destination pointer and length to account for the bytes - // written by the match, then load the next opcode byte and branch to - // the appropriate implementation. - PTR_LEN_INC(dst_ptr, dst_len, M); - opc = src_ptr[0]; + // Copy partial match + size_t i; + for (i = 0; i < dst_len; ++i) + dst_ptr[i] = dst_ptr[i - D]; + // Save state + state->src = src_ptr; + state->dst = dst_ptr + dst_len; + state->L = 0; + state->M = M - dst_len; + state->D = D; + return; // destination truncated + } + // Update the destination pointer and length to account for the bytes + // written by the match, then load the next opcode byte and branch to + // the appropriate implementation. + PTR_LEN_INC(dst_ptr, dst_len, M); + opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES - goto *opc_tbl[opc]; + goto *opc_tbl[opc]; #else - break; + break; #endif -// =============================================================== -// Opcodes representing only a match (no literal). -// These two opcodes (lrg_m and sml_m) encode only a match. The match -// distance is carried over from the previous opcode, so all they need -// to encode is the match length. We are able to reuse the match copy -// sequence from the literal and match opcodes to perform the actual -// copy implementation. +/* =============================================================== + * Opcodes representing only a match (no literal). + * These two opcodes (lrg_m and sml_m) encode only a match. The match + * distance is carried over from the previous opcode, so all they need + * to encode is the match length. We are able to reuse the match copy + * sequence from the literal and match opcodes to perform the actual + * copy implementation. + */ sml_m: #if !HAVE_LABELS_AS_VALUES - case 241: - case 242: - case 243: - case 244: - case 245: - case 246: - case 247: - case 248: - case 249: - case 250: - case 251: - case 252: - case 253: - case 254: - case 255: +case 241: +case 242: +case 243: +case 244: +case 245: +case 246: +case 247: +case 248: +case 249: +case 250: +case 251: +case 252: +case 253: +case 254: +case 255: #endif - UPDATE_GOOD; - // "small match": This opcode has no literal, and uses the previous match - // distance (i.e. it encodes only the match length), in a single byte as - // 1111MMMM. - opc_len = 1; - if (src_len <= opc_len) - return; // source truncated - M = (size_t)extract(opc, 0, 4); - PTR_LEN_INC(src_ptr, src_len, opc_len); - goto copy_match; + UPDATE_GOOD; + // "small match": This opcode has no literal, and uses the previous match + // distance (i.e. it encodes only the match length), in a single byte as + // 1111MMMM. + opc_len = 1; + if (src_len <= opc_len) + return; // source truncated + M = (size_t)extract(opc, 0, 4); + PTR_LEN_INC(src_ptr, src_len, opc_len); + goto copy_match; lrg_m: #if !HAVE_LABELS_AS_VALUES - case 240: +case 240: #endif - UPDATE_GOOD; - // "large match": This opcode has no literal, and uses the previous match - // distance (i.e. it encodes only the match length). It is encoded in two - // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can - // be represented by sml_m, there is an implicit bias of 16 on the match - // length; the representable values are [16,271]. - opc_len = 2; - if (src_len <= opc_len) - return; // source truncated - M = src_ptr[1] + 16; - PTR_LEN_INC(src_ptr, src_len, opc_len); - goto copy_match; + UPDATE_GOOD; + // "large match": This opcode has no literal, and uses the previous match + // distance (i.e. it encodes only the match length). It is encoded in two + // bytes as 11110000 MMMMMMMM. Because matches smaller than 16 bytes can + // be represented by sml_m, there is an implicit bias of 16 on the match + // length; the representable values are [16,271]. + opc_len = 2; + if (src_len <= opc_len) + return; // source truncated + M = src_ptr[1] + 16; + PTR_LEN_INC(src_ptr, src_len, opc_len); + goto copy_match; -// =============================================================== -// Opcodes representing only a literal (no match). -// These two opcodes (lrg_l and sml_l) encode only a literal. There is no -// match length or match distance to worry about (but we need to *not* -// touch D, as it must be preserved between opcodes). +/* =============================================================== + * Opcodes representing only a literal (no match). + * These two opcodes (lrg_l and sml_l) encode only a literal. There is no + * match length or match distance to worry about (but we need to *not* + * touch D, as it must be preserved between opcodes). + */ sml_l: #if !HAVE_LABELS_AS_VALUES - case 225: - case 226: - case 227: - case 228: - case 229: - case 230: - case 231: - case 232: - case 233: - case 234: - case 235: - case 236: - case 237: - case 238: - case 239: +case 225: +case 226: +case 227: +case 228: +case 229: +case 230: +case 231: +case 232: +case 233: +case 234: +case 235: +case 236: +case 237: +case 238: +case 239: #endif - UPDATE_GOOD; - // "small literal": This opcode has no match, and encodes only a literal - // of length up to 15 bytes. The format is 1110LLLL LITERAL. - opc_len = 1; - L = (size_t)extract(opc, 0, 4); - goto copy_literal; + UPDATE_GOOD; + // "small literal": This opcode has no match, and encodes only a literal + // of length up to 15 bytes. The format is 1110LLLL LITERAL. + opc_len = 1; + L = (size_t)extract(opc, 0, 4); + goto copy_literal; lrg_l: #if !HAVE_LABELS_AS_VALUES - case 224: +case 224: #endif - UPDATE_GOOD; - // "large literal": This opcode has no match, and uses the previous match - // distance (i.e. it encodes only the match length). It is encoded in two - // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16 - // bytes can be represented by sml_l, there is an implicit bias of 16 on - // the literal length; the representable values are [16,271]. - opc_len = 2; - if (src_len <= 2) - return; // source truncated - L = src_ptr[1] + 16; - goto copy_literal; + UPDATE_GOOD; + // "large literal": This opcode has no match, and uses the previous match + // distance (i.e. it encodes only the match length). It is encoded in two + // bytes as 11100000 LLLLLLLL LITERAL. Because literals smaller than 16 + // bytes can be represented by sml_l, there is an implicit bias of 16 on + // the literal length; the representable values are [16,271]. + opc_len = 2; + if (src_len <= 2) + return; // source truncated + L = src_ptr[1] + 16; + goto copy_literal; copy_literal: - // Check that the source buffer is large enough to hold the complete - // literal and at least the first byte of the next opcode. If so, advance - // the source pointer to point to the first byte of the literal and adjust - // the source length accordingly. - if (src_len <= opc_len + L) - return; // source truncated - PTR_LEN_INC(src_ptr, src_len, opc_len); - // Now we copy the literal from the source pointer to the destination. - if (dst_len >= L + 7 && src_len >= L + 7) { - // We are not near the end of the source or destination buffers; thus - // we can safely copy the literal using wide copies, without worrying - // about reading or writing past the end of either buffer. - size_t i; - for (i = 0; i < L; i += 8) - store8(&dst_ptr[i], load8(&src_ptr[i])); - } else if (L <= dst_len) { - // We are too close to the end of either the input or output stream - // to be able to safely use an eight-byte copy. Instead we copy the - // literal byte-by-byte. - size_t i; - for (i = 0; i < L; ++i) - dst_ptr[i] = src_ptr[i]; - } else { - // Destination truncated: fill DST, and store partial match + // Check that the source buffer is large enough to hold the complete + // literal and at least the first byte of the next opcode. If so, advance + // the source pointer to point to the first byte of the literal and adjust + // the source length accordingly. + if (src_len <= opc_len + L) + return; // source truncated + PTR_LEN_INC(src_ptr, src_len, opc_len); + // Now we copy the literal from the source pointer to the destination. + if (dst_len >= L + 7 && src_len >= L + 7) { + // We are not near the end of the source or destination buffers; thus + // we can safely copy the literal using wide copies, without worrying + // about reading or writing past the end of either buffer. + size_t i; + for (i = 0; i < L; i += 8) + store8(&dst_ptr[i], load8(&src_ptr[i])); + } else if (L <= dst_len) { + // We are too close to the end of either the input or output stream + // to be able to safely use an eight-byte copy. Instead we copy the + // literal byte-by-byte. + size_t i; + for (i = 0; i < L; ++i) + dst_ptr[i] = src_ptr[i]; + } else { + // Destination truncated: fill DST, and store partial match - // Copy partial literal - size_t i; - for (i = 0; i < dst_len; ++i) - dst_ptr[i] = src_ptr[i]; - // Save state - state->src = src_ptr + dst_len; - state->dst = dst_ptr + dst_len; - state->L = L - dst_len; - state->M = 0; - state->D = D; - return; // destination truncated - } - // Having completed the copy of the literal, we advance both the source - // and destination pointers by the number of literal bytes. - PTR_LEN_INC(dst_ptr, dst_len, L); - PTR_LEN_INC(src_ptr, src_len, L); - // Load the first byte of the next opcode, and jump to its implementation. - opc = src_ptr[0]; + // Copy partial literal + size_t i; + for (i = 0; i < dst_len; ++i) + dst_ptr[i] = src_ptr[i]; + // Save state + state->src = src_ptr + dst_len; + state->dst = dst_ptr + dst_len; + state->L = L - dst_len; + state->M = 0; + state->D = D; + return; // destination truncated + } + // Having completed the copy of the literal, we advance both the source + // and destination pointers by the number of literal bytes. + PTR_LEN_INC(dst_ptr, dst_len, L); + PTR_LEN_INC(src_ptr, src_len, L); + // Load the first byte of the next opcode, and jump to its implementation. + opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES - goto *opc_tbl[opc]; + goto *opc_tbl[opc]; #else - break; + break; #endif -// =============================================================== -// Other opcodes +/* =============================================================== + * Other opcodes + */ nop: #if !HAVE_LABELS_AS_VALUES - case 14: - case 22: +case 14: +case 22: #endif - UPDATE_GOOD; - opc_len = 1; - if (src_len <= opc_len) - return; // source truncated - PTR_LEN_INC(src_ptr, src_len, opc_len); - opc = src_ptr[0]; + UPDATE_GOOD; + opc_len = 1; + if (src_len <= opc_len) + return; // source truncated + PTR_LEN_INC(src_ptr, src_len, opc_len); + opc = src_ptr[0]; #if HAVE_LABELS_AS_VALUES - goto *opc_tbl[opc]; + goto *opc_tbl[opc]; #else - break; + break; #endif eos: #if !HAVE_LABELS_AS_VALUES - case 6: +case 6: #endif - opc_len = 8; - if (src_len < opc_len) - return; // source truncated (here we don't need an extra byte for next op - // code) - PTR_LEN_INC(src_ptr, src_len, opc_len); - state->end_of_stream = 1; - UPDATE_GOOD; - return; // end-of-stream + opc_len = 8; + if (src_len < opc_len) + return; // source truncated (here we don't need an extra byte for next op + // code) + PTR_LEN_INC(src_ptr, src_len, opc_len); + state->end_of_stream = 1; + UPDATE_GOOD; + return; // end-of-stream -// =============================================================== -// Return on error +/* =============================================================== + * Return on error + */ udef: #if !HAVE_LABELS_AS_VALUES - case 30: - case 38: - case 46: - case 54: - case 62: - case 112: - case 113: - case 114: - case 115: - case 116: - case 117: - case 118: - case 119: - case 120: - case 121: - case 122: - case 123: - case 124: - case 125: - case 126: - case 127: - case 208: - case 209: - case 210: - case 211: - case 212: - case 213: - case 214: - case 215: - case 216: - case 217: - case 218: - case 219: - case 220: - case 221: - case 222: - case 223: +case 30: +case 38: +case 46: +case 54: +case 62: +case 112: +case 113: +case 114: +case 115: +case 116: +case 117: +case 118: +case 119: +case 120: +case 121: +case 122: +case 123: +case 124: +case 125: +case 126: +case 127: +case 208: +case 209: +case 210: +case 211: +case 212: +case 213: +case 214: +case 215: +case 216: +case 217: +case 218: +case 219: +case 220: +case 221: +case 222: +case 223: #endif invalid_match_distance: - return; // we already updated state + return; // we already updated state #if !HAVE_LABELS_AS_VALUES - } - } +} +} #endif } diff --git a/lzfse/lzvn_decode_base.h b/lzfse/lzvn_decode_base.h index 1262ead..ef6b024 100644 --- a/lzfse/lzvn_decode_base.h +++ b/lzfse/lzvn_decode_base.h @@ -1,27 +1,13 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZVN low-level decoder (v2) -// Functions in the low-level API should switch to these at some point. -// Apr 2014 +/* LZVN low-level decoder (v2) + * Functions in the low-level API should switch to these at some point. + * Apr 2014 + */ #ifndef LZVN_DECODE_BASE_H #define LZVN_DECODE_BASE_H @@ -30,34 +16,33 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSI /*! @abstract Base decoder state. */ typedef struct { + // Decoder I/O - // Decoder I/O - - // Next byte to read in source buffer - const unsigned char *src; - // Next byte after source buffer - const unsigned char *src_end; + // Next byte to read in source buffer + const unsigned char *src; + // Next byte after source buffer + const unsigned char *src_end; - // Next byte to write in destination buffer (by decoder) - unsigned char *dst; - // Valid range for destination buffer is [dst_begin, dst_end - 1] - unsigned char *dst_begin; - unsigned char *dst_end; - // Next byte to read in destination buffer (modified by caller) - unsigned char *dst_current; + // Next byte to write in destination buffer (by decoder) + unsigned char *dst; + // Valid range for destination buffer is [dst_begin, dst_end - 1] + unsigned char *dst_begin; + unsigned char *dst_end; + // Next byte to read in destination buffer (modified by caller) + unsigned char *dst_current; - // Decoder state + // Decoder state - // Partially expanded match, or 0,0,0. - // In that case, src points to the next literal to copy, or the next op-code - // if L==0. - size_t L, M, D; + // Partially expanded match, or 0,0,0. + // In that case, src points to the next literal to copy, or the next op-code + // if L==0. + size_t L, M, D; - // Distance for last emitted match, or 0 - lzvn_offset d_prev; + // Distance for last emitted match, or 0 + lzvn_offset d_prev; - // Did we decode end-of-stream? - int end_of_stream; + // Did we decode end-of-stream? + int end_of_stream; } lzvn_decoder_state; diff --git a/lzfse/lzvn_encode_base.c b/lzfse/lzvn_encode_base.c index c86b551..ee40a23 100644 --- a/lzfse/lzvn_encode_base.c +++ b/lzfse/lzvn_encode_base.c @@ -1,55 +1,40 @@ +// SPDX-License-Identifier: BSD-3-Clause /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -// LZVN low-level encoder +/* LZVN low-level encoder + */ #include "lzvn_encode_base.h" -#if defined(_MSC_VER) && !defined(__clang__) -# define restrict __restrict -#endif - -// =============================================================== -// Coarse/fine copy, non overlapping buffers +/* =============================================================== + * Coarse/fine copy, non overlapping buffers + */ /*! @abstract Copy at least \p nbytes bytes from \p src to \p dst, by blocks * of 8 bytes (may go beyond range). No overlap. * @return \p dst + \p nbytes. */ static inline unsigned char *lzvn_copy64(unsigned char *restrict dst, - const unsigned char *restrict src, - size_t nbytes) { - for (size_t i = 0; i < nbytes; i += 8) - store8(dst + i, load8(src + i)); - return dst + nbytes; + const unsigned char *restrict src, + size_t nbytes) +{ + for (size_t i = 0; i < nbytes; i += 8) + store8(dst + i, load8(src + i)); + return dst + nbytes; } /*! @abstract Copy exactly \p nbytes bytes from \p src to \p dst (respects range). * No overlap. * @return \p dst + \p nbytes. */ static inline unsigned char *lzvn_copy8(unsigned char *restrict dst, - const unsigned char *restrict src, - size_t nbytes) { - for (size_t i = 0; i < nbytes; i++) - dst[i] = src[i]; - return dst + nbytes; + const unsigned char *restrict src, + size_t nbytes) +{ + for (size_t i = 0; i < nbytes; i++) + dst[i] = src[i]; + return dst + nbytes; } /*! @abstract Emit (L,0,0) instructions (final literal). @@ -60,29 +45,30 @@ static inline unsigned char *lzvn_copy8(unsigned char *restrict dst, * @return \p q1 if output is full. In that case, output will be partially invalid. */ static inline unsigned char *emit_literal(const unsigned char *p, - unsigned char *q, unsigned char *q1, - size_t L) { - size_t x; - while (L > 15) { - x = L < 271 ? L : 271; - if (q + x + 10 >= q1) - goto OUT_FULL; - store2(q, 0xE0 + ((x - 16) << 8)); - q += 2; - L -= x; - q = lzvn_copy8(q, p, x); - p += x; - } - if (L > 0) { - if (q + L + 10 >= q1) - goto OUT_FULL; - *q++ = 0xE0 + L; // 1110LLLL - q = lzvn_copy8(q, p, L); - } - return q; + unsigned char *q, unsigned char *q1, + size_t L) +{ + size_t x; + while (L > 15) { + x = L < 271 ? L : 271; + if (q + x + 10 >= q1) + goto OUT_FULL; + store2(q, 0xE0 + ((x - 16) << 8)); + q += 2; + L -= x; + q = lzvn_copy8(q, p, x); + p += x; + } + if (L > 0) { + if (q + L + 10 >= q1) + goto OUT_FULL; + *q++ = 0xE0 + L; // 1110LLLL + q = lzvn_copy8(q, p, L); + } + return q; OUT_FULL: - return q1; + return q1; } /*! @abstract Emit (L,M,D) instructions. M>=3. @@ -93,125 +79,137 @@ static inline unsigned char *emit_literal(const unsigned char *p, * @return \p q1 if output is full. In that case, output will be partially invalid. */ static inline unsigned char *emit(const unsigned char *p, unsigned char *q, - unsigned char *q1, size_t L, size_t M, - size_t D, size_t D_prev) { - size_t x; - while (L > 15) { - x = L < 271 ? L : 271; - if (q + x + 10 >= q1) - goto OUT_FULL; - store2(q, 0xE0 + ((x - 16) << 8)); - q += 2; - L -= x; - q = lzvn_copy64(q, p, x); - p += x; - } - if (L > 3) { - if (q + L + 10 >= q1) - goto OUT_FULL; - *q++ = 0xE0 + L; // 1110LLLL - q = lzvn_copy64(q, p, L); - p += L; - L = 0; - } - x = M <= 10 - 2 * L ? M : 10 - 2 * L; // x = min(10-2*L,M) - M -= x; - x -= 3; // M = (x+3) + M' max value for x is 7-2*L - - // Here L<4 literals remaining, we read them here - uint32_t literal = load4(p); - // P is not accessed after this point - - // Relaxed capacity test covering all cases - if (q + 8 >= q1) - goto OUT_FULL; - - if (D == D_prev) { - if (L == 0) { - *q++ = 0xF0 + (x + 3); // XM! - } else { - *q++ = (L << 6) + (x << 3) + 6; // LLxxx110 - } - store4(q, literal); - q += L; - } else if (D < 2048 - 2 * 256) { - // Short dist D>>8 in 0..5 - *q++ = (D >> 8) + (L << 6) + (x << 3); // LLxxxDDD - *q++ = D & 0xFF; - store4(q, literal); - q += L; - } else if (D >= (1 << 14) || M == 0 || (x + 3) + M > 34) { - // Long dist - *q++ = (L << 6) + (x << 3) + 7; - store2(q, D); - q += 2; - store4(q, literal); - q += L; - } else { - // Medium distance - x += M; - M = 0; - *q++ = 0xA0 + (x >> 2) + (L << 3); - store2(q, D << 2 | (x & 3)); - q += 2; - store4(q, literal); - q += L; - } - - // Issue remaining match - while (M > 15) { - if (q + 2 >= q1) - goto OUT_FULL; - x = M < 271 ? M : 271; - store2(q, 0xf0 + ((x - 16) << 8)); - q += 2; - M -= x; - } - if (M > 0) { - if (q + 1 >= q1) - goto OUT_FULL; - *q++ = 0xF0 + M; // M = 0..15 - } - - return q; + unsigned char *q1, size_t L, size_t M, + size_t D, size_t D_prev) +{ + size_t x; + while (L > 15) { + x = L < 271 ? L : 271; + if (q + x + 10 >= q1) + goto OUT_FULL; + store2(q, 0xE0 + ((x - 16) << 8)); + q += 2; + L -= x; + q = lzvn_copy64(q, p, x); + p += x; + } + if (L > 3) { + if (q + L + 10 >= q1) + goto OUT_FULL; + *q++ = 0xE0 + L; // 1110LLLL + q = lzvn_copy64(q, p, L); + p += L; + L = 0; + } + x = M <= 10 - 2 * L ? M : 10 - 2 * L; // x = min(10-2*L,M) + M -= x; + x -= 3; // M = (x+3) + M' max value for x is 7-2*L + + // Here L<4 literals remaining, we read them here + uint32_t literal = load4(p); + // P is not accessed after this point + + // Relaxed capacity test covering all cases + if (q + 8 >= q1) + goto OUT_FULL; + + if (D == D_prev) { + if (L == 0) { + *q++ = 0xF0 + (x + 3); // XM! + } else { + *q++ = (L << 6) + (x << 3) + 6; // LLxxx110 + } + store4(q, literal); + q += L; + } else if (D < 2048 - 2 * 256) { + // Short dist D>>8 in 0..5 + *q++ = (D >> 8) + (L << 6) + (x << 3); // LLxxxDDD + *q++ = D & 0xFF; + store4(q, literal); + q += L; + } else if (D >= (1 << 14) || M == 0 || (x + 3) + M > 34) { + // Long dist + *q++ = (L << 6) + (x << 3) + 7; + store2(q, D); + q += 2; + store4(q, literal); + q += L; + } else { + // Medium distance + x += M; + M = 0; + *q++ = 0xA0 + (x >> 2) + (L << 3); + store2(q, D << 2 | (x & 3)); + q += 2; + store4(q, literal); + q += L; + } + + // Issue remaining match + while (M > 15) { + if (q + 2 >= q1) + goto OUT_FULL; + x = M < 271 ? M : 271; + store2(q, 0xf0 + ((x - 16) << 8)); + q += 2; + M -= x; + } + if (M > 0) { + if (q + 1 >= q1) + goto OUT_FULL; + *q++ = 0xF0 + M; // M = 0..15 + } + + return q; OUT_FULL: - return q1; + return q1; } -// =============================================================== -// Conversions +/* =============================================================== + * Conversions + */ /*! @abstract Return 32-bit value to store for offset x. */ -static inline int32_t offset_to_s32(lzvn_offset x) { return (int32_t)x; } +static inline int32_t offset_to_s32(lzvn_offset x) +{ + return (int32_t)x; +} /*! @abstract Get offset from 32-bit stored value x. */ -static inline lzvn_offset offset_from_s32(int32_t x) { return (lzvn_offset)x; } +static inline lzvn_offset offset_from_s32(int32_t x) +{ + return (lzvn_offset)x; +} -// =============================================================== -// Hash and Matching +/* =============================================================== + * Hash and Matching + */ /*! @abstract Get hash in range \c [0,LZVN_ENCODE_HASH_VALUES-1] from 3 bytes in i. */ -static inline uint32_t hash3i(uint32_t i) { - i &= 0xffffff; // truncate to 24-bit input (slightly increases compression ratio) - uint32_t h = (i * (1 + (1 << 6) + (1 << 12))) >> 12; - return h & (LZVN_ENCODE_HASH_VALUES - 1); +static inline uint32_t hash3i(uint32_t i) +{ + i &= 0xffffff; // truncate to 24-bit input (slightly increases compression ratio) + uint32_t h = (i * (1 + (1 << 6) + (1 << 12))) >> 12; + return h & (LZVN_ENCODE_HASH_VALUES - 1); } /*! @abstract Return the number [0, 4] of zero bytes in \p x, starting from the * least significant byte. */ -static inline lzvn_offset trailing_zero_bytes(uint32_t x) { - return (x == 0) ? 4 : (__builtin_ctzl(x) >> 3); +static inline lzvn_offset trailing_zero_bytes(uint32_t x) +{ + return (x == 0) ? 4 : (__builtin_ctzl(x) >> 3); } /*! @abstract Return the number [0, 4] of matching chars between values at * \p src+i and \p src+j, starting from the least significant byte. * Assumes we can read 4 chars from each position. */ static inline lzvn_offset nmatch4(const unsigned char *src, lzvn_offset i, - lzvn_offset j) { - uint32_t vi = load4(src + i); - uint32_t vj = load4(src + j); - return trailing_zero_bytes(vi ^ vj); + lzvn_offset j) +{ + uint32_t vi = load4(src + i); + uint32_t vj = load4(src + j); + return trailing_zero_bytes(vi ^ vj); } /*! @abstract Check if l_begin, m_begin, m0_begin (m0_begin < m_begin) can be @@ -224,370 +222,396 @@ static inline lzvn_offset nmatch4(const unsigned char *src, lzvn_offset i, * otherwise return 0. * @note \p *match should be 0 before the call. */ static inline int lzvn_find_match(const unsigned char *src, - lzvn_offset src_begin, - lzvn_offset src_end, lzvn_offset l_begin, - lzvn_offset m0_begin, lzvn_offset m_begin, - lzvn_match_info *match) { - lzvn_offset n = nmatch4(src, m_begin, m0_begin); - if (n < 3) - return 0; // no match - - lzvn_offset D = m_begin - m0_begin; // actual distance - if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE) - return 0; // distance out of range - - // Expand forward - lzvn_offset m_end = m_begin + n; - while (n == 4 && m_end + 4 < src_end) { - n = nmatch4(src, m_end, m_end - D); - m_end += n; - } - - // Expand backwards over literal - while (m0_begin > src_begin && m_begin > l_begin && - src[m_begin - 1] == src[m0_begin - 1]) { - m0_begin--; - m_begin--; - } - - // OK, we keep it, update MATCH - lzvn_offset M = m_end - m_begin; // match length - match->m_begin = m_begin; - match->m_end = m_end; - match->K = M - ((D < 0x600) ? 2 : 3); - match->M = M; - match->D = D; - - return 1; // OK + lzvn_offset src_begin, lzvn_offset src_end, + lzvn_offset l_begin, lzvn_offset m0_begin, + lzvn_offset m_begin, lzvn_match_info *match) +{ + lzvn_offset n = nmatch4(src, m_begin, m0_begin); + if (n < 3) + return 0; // no match + + lzvn_offset D = m_begin - m0_begin; // actual distance + if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE) + return 0; // distance out of range + + // Expand forward + lzvn_offset m_end = m_begin + n; + while (n == 4 && m_end + 4 < src_end) { + n = nmatch4(src, m_end, m_end - D); + m_end += n; + } + + // Expand backwards over literal + while (m0_begin > src_begin && m_begin > l_begin && + src[m_begin - 1] == src[m0_begin - 1]) { + m0_begin--; + m_begin--; + } + + // OK, we keep it, update MATCH + lzvn_offset M = m_end - m_begin; // match length + match->m_begin = m_begin; + match->m_end = m_end; + match->K = M - ((D < 0x600) ? 2 : 3); + match->M = M; + match->D = D; + + return 1; // OK } /*! @abstract Same as lzvn_find_match, but we already know that N bytes do * match (N<=4). */ static inline int lzvn_find_matchN(const unsigned char *src, - lzvn_offset src_begin, - lzvn_offset src_end, lzvn_offset l_begin, - lzvn_offset m0_begin, lzvn_offset m_begin, - lzvn_offset n, lzvn_match_info *match) { - // We can skip the first comparison on 4 bytes - if (n < 3) - return 0; // no match - - lzvn_offset D = m_begin - m0_begin; // actual distance - if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE) - return 0; // distance out of range - - // Expand forward - lzvn_offset m_end = m_begin + n; - while (n == 4 && m_end + 4 < src_end) { - n = nmatch4(src, m_end, m_end - D); - m_end += n; - } - - // Expand backwards over literal - while (m0_begin > src_begin && m_begin > l_begin && - src[m_begin - 1] == src[m0_begin - 1]) { - m0_begin--; - m_begin--; - } - - // OK, we keep it, update MATCH - lzvn_offset M = m_end - m_begin; // match length - match->m_begin = m_begin; - match->m_end = m_end; - match->K = M - ((D < 0x600) ? 2 : 3); - match->M = M; - match->D = D; - - return 1; // OK + lzvn_offset src_begin, lzvn_offset src_end, + lzvn_offset l_begin, lzvn_offset m0_begin, + lzvn_offset m_begin, lzvn_offset n, + lzvn_match_info *match) +{ + // We can skip the first comparison on 4 bytes + if (n < 3) + return 0; // no match + + lzvn_offset D = m_begin - m0_begin; // actual distance + if (D <= 0 || D > LZVN_ENCODE_MAX_DISTANCE) + return 0; // distance out of range + + // Expand forward + lzvn_offset m_end = m_begin + n; + while (n == 4 && m_end + 4 < src_end) { + n = nmatch4(src, m_end, m_end - D); + m_end += n; + } + + // Expand backwards over literal + while (m0_begin > src_begin && m_begin > l_begin && + src[m_begin - 1] == src[m0_begin - 1]) { + m0_begin--; + m_begin--; + } + + // OK, we keep it, update MATCH + lzvn_offset M = m_end - m_begin; // match length + match->m_begin = m_begin; + match->m_end = m_end; + match->K = M - ((D < 0x600) ? 2 : 3); + match->M = M; + match->D = D; + + return 1; // OK } -// =============================================================== -// Encoder Backend +/* =============================================================== + * Encoder Backend + */ /*! @abstract Emit a match and update state. * @return number of bytes written to \p dst. May be 0 if there is no more space * in \p dst to emit the match. */ static inline lzvn_offset lzvn_emit_match(lzvn_encoder_state *state, - lzvn_match_info match) { - size_t L = (size_t)(match.m_begin - state->src_literal); // literal count - size_t M = (size_t)match.M; // match length - size_t D = (size_t)match.D; // match distance - size_t D_prev = (size_t)state->d_prev; // previously emitted match distance - unsigned char *dst = emit(state->src + state->src_literal, state->dst, - state->dst_end, L, M, D, D_prev); - // Check if DST is full - if (dst >= state->dst_end) { - return 0; // FULL - } - - // Update state - lzvn_offset dst_used = dst - state->dst; - state->d_prev = match.D; - state->dst = dst; - state->src_literal = match.m_end; - return dst_used; + lzvn_match_info match) +{ + size_t L = + (size_t)(match.m_begin - state->src_literal); // literal count + size_t M = (size_t)match.M; // match length + size_t D = (size_t)match.D; // match distance + size_t D_prev = + (size_t)state->d_prev; // previously emitted match distance + unsigned char *dst = emit(state->src + state->src_literal, state->dst, + state->dst_end, L, M, D, D_prev); + // Check if DST is full + if (dst >= state->dst_end) { + return 0; // FULL + } + + // Update state + lzvn_offset dst_used = dst - state->dst; + state->d_prev = match.D; + state->dst = dst; + state->src_literal = match.m_end; + return dst_used; } /*! @abstract Emit a n-bytes literal and update state. * @return number of bytes written to \p dst. May be 0 if there is no more space * in \p dst to emit the literal. */ static inline lzvn_offset lzvn_emit_literal(lzvn_encoder_state *state, - lzvn_offset n) { - size_t L = (size_t)n; - unsigned char *dst = emit_literal(state->src + state->src_literal, state->dst, - state->dst_end, L); - // Check if DST is full - if (dst >= state->dst_end) - return 0; // FULL - - // Update state - lzvn_offset dst_used = dst - state->dst; - state->dst = dst; - state->src_literal += n; - return dst_used; + lzvn_offset n) +{ + size_t L = (size_t)n; + unsigned char *dst = emit_literal(state->src + state->src_literal, + state->dst, state->dst_end, L); + // Check if DST is full + if (dst >= state->dst_end) + return 0; // FULL + + // Update state + lzvn_offset dst_used = dst - state->dst; + state->dst = dst; + state->src_literal += n; + return dst_used; } /*! @abstract Emit end-of-stream and update state. * @return number of bytes written to \p dst. May be 0 if there is no more space * in \p dst to emit the instruction. */ -static inline lzvn_offset lzvn_emit_end_of_stream(lzvn_encoder_state *state) { - // Do we have 8 byte in dst? - if (state->dst_end < state->dst + 8) - return 0; // FULL - - // Insert end marker and update state - store8(state->dst, 0x06); // end-of-stream command - state->dst += 8; - return 8; // dst_used +static inline lzvn_offset lzvn_emit_end_of_stream(lzvn_encoder_state *state) +{ + // Do we have 8 byte in dst? + if (state->dst_end < state->dst + 8) + return 0; // FULL + + // Insert end marker and update state + store8(state->dst, 0x06); // end-of-stream command + state->dst += 8; + return 8; // dst_used } -// =============================================================== -// Encoder Functions +/* =============================================================== + * Encoder Functions + */ /*! @abstract Initialize encoder table in \p state, uses current I/O parameters. */ -static inline void lzvn_init_table(lzvn_encoder_state *state) { - lzvn_offset index = -LZVN_ENCODE_MAX_DISTANCE; // max match distance - if (index < state->src_begin) - index = state->src_begin; - uint32_t value = load4(state->src + index); - - lzvn_encode_entry_type e; - for (int i = 0; i < 4; i++) { - e.indices[i] = offset_to_s32(index); - e.values[i] = value; - } - for (int u = 0; u < LZVN_ENCODE_HASH_VALUES; u++) - state->table[u] = e; // fill entire table +static inline void lzvn_init_table(lzvn_encoder_state *state) +{ + lzvn_offset index = -LZVN_ENCODE_MAX_DISTANCE; // max match distance + if (index < state->src_begin) + index = state->src_begin; + uint32_t value = load4(state->src + index); + + lzvn_encode_entry_type e; + for (int i = 0; i < 4; i++) { + e.indices[i] = offset_to_s32(index); + e.values[i] = value; + } + for (int u = 0; u < LZVN_ENCODE_HASH_VALUES; u++) + state->table[u] = e; // fill entire table } -void lzvn_encode(lzvn_encoder_state *state) { - const lzvn_match_info NO_MATCH = {0}; - - for (; state->src_current < state->src_current_end; state->src_current++) { - // Get 4 bytes at src_current - uint32_t vi = load4(state->src + state->src_current); - - // Compute new hash H at position I, and push value into position table - int h = hash3i(vi); // index of first entry - - // Read table entries for H - lzvn_encode_entry_type e = state->table[h]; - - // Update entry with index=current and value=vi - lzvn_encode_entry_type updated_e; // rotate values, so we will replace the oldest - updated_e.indices[0] = offset_to_s32(state->src_current); - updated_e.indices[1] = e.indices[0]; - updated_e.indices[2] = e.indices[1]; - updated_e.indices[3] = e.indices[2]; - updated_e.values[0] = vi; - updated_e.values[1] = e.values[0]; - updated_e.values[2] = e.values[1]; - updated_e.values[3] = e.values[2]; - - // Do not check matches if still in previously emitted match - if (state->src_current < state->src_literal) - goto after_emit; - -// Update best with candidate if better -#define UPDATE(best, candidate) \ - do { \ - if (candidate.K > best.K || \ - ((candidate.K == best.K) && (candidate.m_end > best.m_end + 1))) { \ - best = candidate; \ - } \ - } while (0) -// Check candidate. Keep if better. -#define CHECK_CANDIDATE(ik, nk) \ - do { \ - lzvn_match_info m1; \ - if (lzvn_find_matchN(state->src, state->src_begin, state->src_end, \ - state->src_literal, ik, state->src_current, nk, &m1)) { \ - UPDATE(incoming, m1); \ - } \ - } while (0) -// Emit match M. Return if we don't have enough space in the destination buffer -#define EMIT_MATCH(m) \ - do { \ - if (lzvn_emit_match(state, m) == 0) \ - return; \ - } while (0) -// Emit literal of length L. Return if we don't have enough space in the -// destination buffer -#define EMIT_LITERAL(l) \ - do { \ - if (lzvn_emit_literal(state, l) == 0) \ - return; \ - } while (0) - - lzvn_match_info incoming = NO_MATCH; - - // Check candidates in order (closest first) - uint32_t diffs[4]; - for (int k = 0; k < 4; k++) - diffs[k] = e.values[k] ^ vi; // XOR, 0 if equal - lzvn_offset ik; // index - lzvn_offset nk; // match byte count - - // The values stored in e.xyzw are 32-bit signed indices, extended to signed - // type lzvn_offset - ik = offset_from_s32(e.indices[0]); - nk = trailing_zero_bytes(diffs[0]); - CHECK_CANDIDATE(ik, nk); - ik = offset_from_s32(e.indices[1]); - nk = trailing_zero_bytes(diffs[1]); - CHECK_CANDIDATE(ik, nk); - ik = offset_from_s32(e.indices[2]); - nk = trailing_zero_bytes(diffs[2]); - CHECK_CANDIDATE(ik, nk); - ik = offset_from_s32(e.indices[3]); - nk = trailing_zero_bytes(diffs[3]); - CHECK_CANDIDATE(ik, nk); - - // Check candidate at previous distance - if (state->d_prev != 0) { - lzvn_match_info m1; - if (lzvn_find_match(state->src, state->src_begin, state->src_end, - state->src_literal, state->src_current - state->d_prev, - state->src_current, &m1)) { - m1.K = m1.M - 1; // fix K for D_prev - UPDATE(incoming, m1); - } - } - - // Here we have the best candidate in incoming, may be NO_MATCH - - // If no incoming match, and literal backlog becomes too high, emit pending - // match, or literals if there is no pending match - if (incoming.M == 0) { - if (state->src_current - state->src_literal >= - LZVN_ENCODE_MAX_LITERAL_BACKLOG) // at this point, we always have - // current >= literal - { - if (state->pending.M != 0) { - EMIT_MATCH(state->pending); - state->pending = NO_MATCH; - } else { - EMIT_LITERAL(271); // emit long literal (271 is the longest literal size we allow) - } - } - goto after_emit; - } - - if (state->pending.M == 0) { - // NOTE. Here, we can also emit incoming right away. It will make the - // encoder 1.5x faster, at a cost of ~10% lower compression ratio: - // EMIT_MATCH(incoming); - // state->pending = NO_MATCH; - - // No pending match, emit nothing, keep incoming - state->pending = incoming; - } else { - // Here we have both incoming and pending - if (state->pending.m_end <= incoming.m_begin) { - // No overlap: emit pending, keep incoming - EMIT_MATCH(state->pending); - state->pending = incoming; - } else { - // If pending is better, emit pending and discard incoming. - // Otherwise, emit incoming and discard pending. - if (incoming.K > state->pending.K) - state->pending = incoming; - EMIT_MATCH(state->pending); - state->pending = NO_MATCH; - } - } - - after_emit: - - // We commit state changes only after we tried to emit instructions, so we - // can restart in the same state in case dst was full and we quit the loop. - state->table[h] = updated_e; - - } // i loop - - // Do not emit pending match here. We do it only at the end of stream. +void lzvn_encode(lzvn_encoder_state *state) +{ + const lzvn_match_info NO_MATCH = { 0 }; + + for (; state->src_current < state->src_current_end; + state->src_current++) { + // Get 4 bytes at src_current + uint32_t vi = load4(state->src + state->src_current); + + // Compute new hash H at position I, and push value into position table + int h = hash3i(vi); // index of first entry + + // Read table entries for H + lzvn_encode_entry_type e = state->table[h]; + + // Update entry with index=current and value=vi + lzvn_encode_entry_type + updated_e; // rotate values, so we will replace the oldest + updated_e.indices[0] = offset_to_s32(state->src_current); + updated_e.indices[1] = e.indices[0]; + updated_e.indices[2] = e.indices[1]; + updated_e.indices[3] = e.indices[2]; + updated_e.values[0] = vi; + updated_e.values[1] = e.values[0]; + updated_e.values[2] = e.values[1]; + updated_e.values[3] = e.values[2]; + + // Do not check matches if still in previously emitted match + if (state->src_current < state->src_literal) + goto after_emit; + +/* Update best with candidate if better + */ +#define UPDATE(best, candidate) \ + do { \ + if (candidate.K > best.K || \ + ((candidate.K == best.K) && \ + (candidate.m_end > best.m_end + 1))) { \ + best = candidate; \ + } \ + } while (0) +/* Check candidate. Keep if better. + */ +#define CHECK_CANDIDATE(ik, nk) \ + do { \ + lzvn_match_info m1; \ + if (lzvn_find_matchN(state->src, state->src_begin, \ + state->src_end, state->src_literal, ik, \ + state->src_current, nk, &m1)) { \ + UPDATE(incoming, m1); \ + } \ + } while (0) +/* Emit match M. Return if we don't have enough space in the destination buffer + */ +#define EMIT_MATCH(m) \ + do { \ + if (lzvn_emit_match(state, m) == 0) \ + return; \ + } while (0) +/* Emit literal of length L. Return if we don't have enough space in the + * destination buffer + */ +#define EMIT_LITERAL(l) \ + do { \ + if (lzvn_emit_literal(state, l) == 0) \ + return; \ + } while (0) + + lzvn_match_info incoming = NO_MATCH; + + // Check candidates in order (closest first) + uint32_t diffs[4]; + for (int k = 0; k < 4; k++) + diffs[k] = e.values[k] ^ vi; // XOR, 0 if equal + lzvn_offset ik; // index + lzvn_offset nk; // match byte count + + // The values stored in e.xyzw are 32-bit signed indices, extended to signed + // type lzvn_offset + ik = offset_from_s32(e.indices[0]); + nk = trailing_zero_bytes(diffs[0]); + CHECK_CANDIDATE(ik, nk); + ik = offset_from_s32(e.indices[1]); + nk = trailing_zero_bytes(diffs[1]); + CHECK_CANDIDATE(ik, nk); + ik = offset_from_s32(e.indices[2]); + nk = trailing_zero_bytes(diffs[2]); + CHECK_CANDIDATE(ik, nk); + ik = offset_from_s32(e.indices[3]); + nk = trailing_zero_bytes(diffs[3]); + CHECK_CANDIDATE(ik, nk); + + // Check candidate at previous distance + if (state->d_prev != 0) { + lzvn_match_info m1; + if (lzvn_find_match(state->src, state->src_begin, + state->src_end, state->src_literal, + state->src_current - state->d_prev, + state->src_current, &m1)) { + m1.K = m1.M - 1; // fix K for D_prev + UPDATE(incoming, m1); + } + } + + // Here we have the best candidate in incoming, may be NO_MATCH + + // If no incoming match, and literal backlog becomes too high, emit pending + // match, or literals if there is no pending match + if (incoming.M == 0) { + if (state->src_current - state->src_literal >= + LZVN_ENCODE_MAX_LITERAL_BACKLOG) // at this point, we always have + // current >= literal + { + if (state->pending.M != 0) { + EMIT_MATCH(state->pending); + state->pending = NO_MATCH; + } else { + EMIT_LITERAL( + 271); // emit long literal (271 is the longest literal size we allow) + } + } + goto after_emit; + } + + if (state->pending.M == 0) { + // NOTE. Here, we can also emit incoming right away. It will make the + // encoder 1.5x faster, at a cost of ~10% lower compression ratio: + // EMIT_MATCH(incoming); + // state->pending = NO_MATCH; + + // No pending match, emit nothing, keep incoming + state->pending = incoming; + } else { + // Here we have both incoming and pending + if (state->pending.m_end <= incoming.m_begin) { + // No overlap: emit pending, keep incoming + EMIT_MATCH(state->pending); + state->pending = incoming; + } else { + // If pending is better, emit pending and discard incoming. + // Otherwise, emit incoming and discard pending. + if (incoming.K > state->pending.K) + state->pending = incoming; + EMIT_MATCH(state->pending); + state->pending = NO_MATCH; + } + } + +after_emit: + + // We commit state changes only after we tried to emit instructions, so we + // can restart in the same state in case dst was full and we quit the loop. + state->table[h] = updated_e; + + } // i loop + + // Do not emit pending match here. We do it only at the end of stream. } -// =============================================================== -// API entry points +/* =============================================================== + * API entry points + */ -size_t lzvn_encode_scratch_size(void) { return LZVN_ENCODE_WORK_SIZE; } +size_t lzvn_encode_scratch_size(void) +{ + return LZVN_ENCODE_WORK_SIZE; +} static size_t lzvn_encode_partial(void *__restrict dst, size_t dst_size, - const void *__restrict src, size_t src_size, - size_t *src_used, void *__restrict work) { - // Min size checks to avoid accessing memory outside buffers. - if (dst_size < LZVN_ENCODE_MIN_DST_SIZE) { - *src_used = 0; - return 0; - } - // Max input size check (limit to offsets on uint32_t). - if (src_size > LZVN_ENCODE_MAX_SRC_SIZE) { - src_size = LZVN_ENCODE_MAX_SRC_SIZE; - } - - // Setup encoder state - lzvn_encoder_state state; - memset(&state, 0, sizeof(state)); - - state.src = src; - state.src_begin = 0; - state.src_end = (lzvn_offset)src_size; - state.src_literal = 0; - state.src_current = 0; - state.dst = dst; - state.dst_begin = dst; - state.dst_end = (unsigned char *)dst + dst_size - 8; // reserve 8 bytes for end-of-stream - state.table = work; - - // Do not encode if the input buffer is too small. We'll emit a literal instead. - if (src_size >= LZVN_ENCODE_MIN_SRC_SIZE) { - - state.src_current_end = (lzvn_offset)src_size - LZVN_ENCODE_MIN_MARGIN; - lzvn_init_table(&state); - lzvn_encode(&state); - - } - - // No need to test the return value: src_literal will not be updated on failure, - // and we will fail later. - lzvn_emit_literal(&state, state.src_end - state.src_literal); - - // Restore original size, so end-of-stream always succeeds, and emit it - state.dst_end = (unsigned char *)dst + dst_size; - lzvn_emit_end_of_stream(&state); - - *src_used = state.src_literal; - return (size_t)(state.dst - state.dst_begin); + const void *__restrict src, size_t src_size, + size_t *src_used, void *__restrict work) +{ + // Min size checks to avoid accessing memory outside buffers. + if (dst_size < LZVN_ENCODE_MIN_DST_SIZE) { + *src_used = 0; + return 0; + } + // Max input size check (limit to offsets on uint32_t). + if (src_size > LZVN_ENCODE_MAX_SRC_SIZE) { + src_size = LZVN_ENCODE_MAX_SRC_SIZE; + } + + // Setup encoder state + lzvn_encoder_state state; + memset(&state, 0, sizeof(state)); + + state.src = src; + state.src_begin = 0; + state.src_end = (lzvn_offset)src_size; + state.src_literal = 0; + state.src_current = 0; + state.dst = dst; + state.dst_begin = dst; + state.dst_end = (unsigned char *)dst + dst_size - + 8; // reserve 8 bytes for end-of-stream + state.table = work; + + // Do not encode if the input buffer is too small. We'll emit a literal instead. + if (src_size >= LZVN_ENCODE_MIN_SRC_SIZE) { + state.src_current_end = + (lzvn_offset)src_size - LZVN_ENCODE_MIN_MARGIN; + lzvn_init_table(&state); + lzvn_encode(&state); + } + + // No need to test the return value: src_literal will not be updated on failure, + // and we will fail later. + lzvn_emit_literal(&state, state.src_end - state.src_literal); + + // Restore original size, so end-of-stream always succeeds, and emit it + state.dst_end = (unsigned char *)dst + dst_size; + lzvn_emit_end_of_stream(&state); + + *src_used = state.src_literal; + return (size_t)(state.dst - state.dst_begin); } size_t lzvn_encode_buffer(void *__restrict dst, size_t dst_size, - const void *__restrict src, size_t src_size, - void *__restrict work) { - size_t src_used = 0; - size_t dst_used = - lzvn_encode_partial(dst, dst_size, src, src_size, &src_used, work); - if (src_used != src_size) - return 0; // could not encode entire input stream = fail - return dst_used; // return encoded size + const void *__restrict src, size_t src_size, + void *__restrict work) +{ + size_t src_used = 0; + size_t dst_used = lzvn_encode_partial(dst, dst_size, src, src_size, + &src_used, work); + if (src_used != src_size) + return 0; // could not encode entire input stream = fail + return dst_used; // return encoded size } diff --git a/lzfse/lzvn_encode_base.h b/lzfse/lzvn_encode_base.h index 308cd4f..c654571 100644 --- a/lzfse/lzvn_encode_base.h +++ b/lzfse/lzvn_encode_base.h @@ -1,110 +1,99 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ /* -Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * Copyright (c) 2015-2016, Apple Inc. All rights reserved. + * + */ -Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer - in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE -COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -// LZVN low-level encoder +/* LZVN low-level encoder + */ #ifndef LZVN_ENCODE_BASE_H #define LZVN_ENCODE_BASE_H #include "lzfse_internal.h" -// =============================================================== -// Types and Constants - -#define LZVN_ENCODE_HASH_BITS \ - 14 // number of bits returned by the hash function [10, 16] -#define LZVN_ENCODE_OFFSETS_PER_HASH \ - 4 // stored offsets stack for each hash value, MUST be 4 -#define LZVN_ENCODE_HASH_VALUES \ - (1 << LZVN_ENCODE_HASH_BITS) // number of entries in hash table -#define LZVN_ENCODE_MAX_DISTANCE \ - 0xffff // max match distance we can represent with LZVN encoding, MUST be - // 0xFFFF -#define LZVN_ENCODE_MIN_MARGIN \ - 8 // min number of bytes required between current and end during encoding, - // MUST be >= 8 -#define LZVN_ENCODE_MAX_LITERAL_BACKLOG \ - 400 // if the number of pending literals exceeds this size, emit a long - // literal, MUST be >= 271 +/* =============================================================== + * Types and Constants + */ + +#define LZVN_ENCODE_HASH_BITS \ + 14 // number of bits returned by the hash function [10, 16] +#define LZVN_ENCODE_OFFSETS_PER_HASH \ + 4 // stored offsets stack for each hash value, MUST be 4 +#define LZVN_ENCODE_HASH_VALUES \ + (1 << LZVN_ENCODE_HASH_BITS) // number of entries in hash table +#define LZVN_ENCODE_MAX_DISTANCE \ + 0xffff // max match distance we can represent with LZVN encoding, MUST be \ + // 0xFFFF +#define LZVN_ENCODE_MIN_MARGIN \ + 8 // min number of bytes required between current and end during encoding, \ + // MUST be >= 8 +#define LZVN_ENCODE_MAX_LITERAL_BACKLOG \ + 400 // if the number of pending literals exceeds this size, emit a long \ + // literal, MUST be >= 271 /*! @abstract Type of table entry. */ typedef struct { - int32_t indices[4]; // signed indices in source buffer - uint32_t values[4]; // corresponding 32-bit values + int32_t indices[4]; // signed indices in source buffer + uint32_t values[4]; // corresponding 32-bit values } lzvn_encode_entry_type; -// Work size -#define LZVN_ENCODE_WORK_SIZE \ - (LZVN_ENCODE_HASH_VALUES * sizeof(lzvn_encode_entry_type)) +/* Work size + */ +#define LZVN_ENCODE_WORK_SIZE \ + (LZVN_ENCODE_HASH_VALUES * sizeof(lzvn_encode_entry_type)) /*! @abstract Match */ typedef struct { - lzvn_offset m_begin; // beginning of match, current position - lzvn_offset m_end; // end of match, this is where the next literal would begin - // if we emit the entire match - lzvn_offset M; // match length M: m_end - m_begin - lzvn_offset D; // match distance D - lzvn_offset K; // match gain: M - distance storage (L not included) + lzvn_offset m_begin; // beginning of match, current position + lzvn_offset + m_end; // end of match, this is where the next literal would begin + // if we emit the entire match + lzvn_offset M; // match length M: m_end - m_begin + lzvn_offset D; // match distance D + lzvn_offset K; // match gain: M - distance storage (L not included) } lzvn_match_info; -// =============================================================== -// Internal encoder state +/* =============================================================== + * Internal encoder state + */ /*! @abstract Base encoder state and I/O. */ typedef struct { - - // Encoder I/O - - // Source buffer - const unsigned char *src; - // Valid range in source buffer: we can access src[i] for src_begin <= i < - // src_end. src_begin may be negative. - lzvn_offset src_begin; - lzvn_offset src_end; - // Next byte to process in source buffer - lzvn_offset src_current; - // Next byte after the last byte to process in source buffer. We MUST have: - // src_current_end + 8 <= src_end. - lzvn_offset src_current_end; - // Next byte to encode in source buffer, may be before or after src_current. - lzvn_offset src_literal; - - // Next byte to write in destination buffer - unsigned char *dst; - // Valid range in destination buffer: [dst_begin, dst_end - 1] - unsigned char *dst_begin; - unsigned char *dst_end; - - // Encoder state - - // Pending match - lzvn_match_info pending; - - // Distance for last emitted match, or 0 - lzvn_offset d_prev; - - // Hash table used to find matches. Stores LZVN_ENCODE_OFFSETS_PER_HASH 32-bit - // signed indices in the source buffer, and the corresponding 4-byte values. - // The number of entries in the table is LZVN_ENCODE_HASH_VALUES. - lzvn_encode_entry_type *table; + // Encoder I/O + + // Source buffer + const unsigned char *src; + // Valid range in source buffer: we can access src[i] for src_begin <= i < + // src_end. src_begin may be negative. + lzvn_offset src_begin; + lzvn_offset src_end; + // Next byte to process in source buffer + lzvn_offset src_current; + // Next byte after the last byte to process in source buffer. We MUST have: + // src_current_end + 8 <= src_end. + lzvn_offset src_current_end; + // Next byte to encode in source buffer, may be before or after src_current. + lzvn_offset src_literal; + + // Next byte to write in destination buffer + unsigned char *dst; + // Valid range in destination buffer: [dst_begin, dst_end - 1] + unsigned char *dst_begin; + unsigned char *dst_end; + + // Encoder state + + // Pending match + lzvn_match_info pending; + + // Distance for last emitted match, or 0 + lzvn_offset d_prev; + + // Hash table used to find matches. Stores LZVN_ENCODE_OFFSETS_PER_HASH 32-bit + // signed indices in the source buffer, and the corresponding 4-byte values. + // The number of entries in the table is LZVN_ENCODE_HASH_VALUES. + lzvn_encode_entry_type *table; } lzvn_encoder_state;