|
| 1 | +#include "stdafx.h" |
| 2 | +#include "Dictionary.h" |
| 3 | + |
| 4 | +// Implementation designed for being extremely fast at the expense of memory |
| 5 | +// usage. The base memory usage is 512 KB (or 768 KB on 64-bit systems). More |
| 6 | +// memory is always allocated but only as much as needed. Larger sized chunks |
| 7 | +// will consume more memory. For a series of 4 KB chunks, the extra consumed |
| 8 | +// memory is around 20-80 KB. For a series of 64 KB chunks, it is 200-800 KB. |
| 9 | + |
| 10 | +// This implementation is ~30x faster than the 576 KB fixed-size Dictionary! |
| 11 | + |
| 12 | +#ifdef __cplusplus_cli |
| 13 | +#pragma unmanaged |
| 14 | +#endif |
| 15 | + |
| 16 | +#pragma optimize("t", on) |
| 17 | + |
| 18 | +#define MAX_BYTE 0x100 // maximum byte value (+1 for 0) |
| 19 | +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) // minimum of 2 values |
| 20 | + |
| 21 | +// An entry within the dictionary, using a dynamically resized array of positions |
| 22 | +typedef struct _Entry // 8+ bytes (12+ bytes on 64-bit systems) |
| 23 | +{ |
| 24 | + const_bytes* pos; |
| 25 | + uint16_t size, cap; |
| 26 | +} Entry; |
| 27 | + |
| 28 | +// The dictionary |
| 29 | +struct _Dictionary // 512+ KB (768+ KB on 64-bit systems) |
| 30 | +{ |
| 31 | + Entry entries[MAX_BYTE][MAX_BYTE]; |
| 32 | +}; |
| 33 | + |
| 34 | +// Creates and returns an uninitialized dictionary struct |
| 35 | +Dictionary* Dictionary_Create() |
| 36 | +{ |
| 37 | + Dictionary* d = (Dictionary*)malloc(sizeof(Dictionary)); |
| 38 | + if (d) memset(d, 0, sizeof(Dictionary)); // need to set pos and cap to 0, might as well set size to 0 |
| 39 | +#ifdef PRINT_ERRORS |
| 40 | + else PRINT_ERROR("Dictionary Creation Error: malloc failed\n"); |
| 41 | +#endif |
| 42 | + return d; |
| 43 | +} |
| 44 | + |
| 45 | +// Destroys a dictionary struct |
| 46 | +void Dictionary_Destroy(Dictionary* d) |
| 47 | +{ |
| 48 | + uint_fast16_t i, j; |
| 49 | + for (i = 0; i < MAX_BYTE; ++i) |
| 50 | + for (j = 0; j < MAX_BYTE; ++j) |
| 51 | + free(d->entries[i][j].pos); |
| 52 | + free(d); |
| 53 | +} |
| 54 | + |
| 55 | +// Resets a dictionary struct ready to start a new chunk |
| 56 | +// This should also be called after Dictionary_Create and before any Dictionary_Add/Dictionary_Find |
| 57 | +// Returns true on success, false on error (and sets errno) |
| 58 | +bool Dictionary_Reset(Dictionary* d) |
| 59 | +{ |
| 60 | + uint_fast16_t i, j; |
| 61 | + for (i = 0; i < MAX_BYTE; ++i) |
| 62 | + for (j = 0; j < MAX_BYTE; ++j) |
| 63 | + d->entries[i][j].size = 0; |
| 64 | + return true; |
| 65 | +} |
| 66 | + |
| 67 | +// Adds data to the dictionary, which will be used as a starting point during future finds |
| 68 | +// Max length is how many bytes can be read from data, regardless of the end of the chunk |
| 69 | +// Returns true on success, false on error |
| 70 | +bool Dictionary_Add(Dictionary* d, const_bytes data, const size_t max_len) |
| 71 | +{ |
| 72 | + if (max_len >= 2) |
| 73 | + { |
| 74 | + const byte x = data[0], y = data[1]; |
| 75 | + Entry* e = d->entries[x]+y; |
| 76 | + if (e->size >= e->cap) |
| 77 | + { |
| 78 | + const_bytes *temp = (const_bytes*)realloc(e->pos, (e->cap=(e->cap?((e->cap==0x8000)?0xFFFF:(e->cap<<1)):8))*sizeof(const_bytes)); |
| 79 | + if (temp == NULL) |
| 80 | + { |
| 81 | + PRINT_ERROR("Dictionary Add Error: realloc failed\n"); |
| 82 | + Dictionary_Destroy(d); |
| 83 | + return false; |
| 84 | + } |
| 85 | + e->pos = temp; |
| 86 | + } |
| 87 | + e->pos[e->size++] = data; |
| 88 | + } |
| 89 | + return true; |
| 90 | +} |
| 91 | + |
| 92 | +// Finds the best symbol in the dictionary for the data at u[pos] |
| 93 | +// Returns the length of the string found, or 0 if nothing of length >= 3 was found |
| 94 | +// offset is set to the offset from the current position to the string |
| 95 | +uint_fast16_t Dictionary_Find(const Dictionary* d, const Dictionary* d2, const_bytes data, const uint_fast16_t max_len, const_bytes search, uint_fast16_t* offset) |
| 96 | +{ |
| 97 | + static const Entry DummyEntry = { NULL, 0, 0 }; // using this instead of NULL reduces a lot of checks |
| 98 | + |
| 99 | + if (max_len >= 3 && data-search > 0) |
| 100 | + { |
| 101 | + const byte x = data[0], y = data[1]; |
| 102 | + const Entry* e = d->entries[x]+y, *e2 = d2 ? d2->entries[x]+y : &DummyEntry; |
| 103 | + if (e->size || e2->size) // a match is possible |
| 104 | + { |
| 105 | + const byte z = data[2]; |
| 106 | + uint_fast16_t l = 0, o; |
| 107 | + int_fast32_t ep = e->size - 1; // need to support all uint16 values and <0 |
| 108 | + |
| 109 | + // Try short repeats - this does not use the Dictionary at all |
| 110 | + if (x == z && y == data[-1]) |
| 111 | + { |
| 112 | + if (x == y) // x == y == z == data[-1] |
| 113 | + { |
| 114 | + // Repeating the last byte |
| 115 | + o = 1; |
| 116 | + l = 3; |
| 117 | + while (l < max_len && data[l] == x) { ++l; } |
| 118 | + --ep; |
| 119 | + if (data-search > 1 && x == data[-2]) |
| 120 | + --ep; |
| 121 | + } |
| 122 | + else if (data-search > 1 && x == data[-2]) // x == z == data[-2], y == data[-1] |
| 123 | + { |
| 124 | + // Repeating the last two bytes |
| 125 | + o = 2; |
| 126 | + l = 3; |
| 127 | + while (l < max_len && data[l] == y) { ++l; if (l < max_len && data[l] == x) { ++l; } else break; } |
| 128 | + --ep; |
| 129 | + } |
| 130 | + |
| 131 | + // Found the best match, stop now |
| 132 | + if (l == max_len) { *offset = o; return l; } |
| 133 | + } |
| 134 | + |
| 135 | + // Do an exhaustive search (with the possible positions) |
| 136 | + if (ep < 0) { ep += (e=e2)->size-1; e2 = &DummyEntry; } |
| 137 | + do |
| 138 | + { |
| 139 | + for (; ep >= 0 && e->pos[ep] >= search; --ep) |
| 140 | + { |
| 141 | + const const_bytes ss = e->pos[ep]; |
| 142 | + if (ss[2] == z) |
| 143 | + { |
| 144 | + const_bytes s = ss+3; |
| 145 | + uint_fast16_t i = 3; |
| 146 | + if (s == data) { s = ss; } |
| 147 | + while (i < max_len && data[i] == *s) |
| 148 | + { |
| 149 | + ++i; |
| 150 | + if (++s == data) { s = ss; } // allow looping back, can have l > o |
| 151 | + } |
| 152 | + if (i > l) { o = (uint_fast16_t)(data-ss); if ((l = i) == max_len) { break; } } |
| 153 | + } |
| 154 | + } |
| 155 | + ep = (e=e2)->size - 1; |
| 156 | + e2 = &DummyEntry; |
| 157 | + } while (ep >= 0); |
| 158 | + |
| 159 | + // Found a match, return it |
| 160 | + if (l >= 3) |
| 161 | + { |
| 162 | + *offset = o; |
| 163 | + return l; |
| 164 | + } |
| 165 | + } |
| 166 | + } |
| 167 | + |
| 168 | + // No match found, return 0 |
| 169 | + return 0; |
| 170 | +} |
0 commit comments