coderforlife
diff --git a/‎.gitignore
Lines changed: 4 additions & 0 deletions b/‎.gitignore
Lines changed: 4 additions & 0 deletions
diff --git a/‎Bitstream.cpp
Lines changed: 80 additions & 0 deletions b/‎Bitstream.cpp
Lines changed: 80 additions & 0 deletions
diff --git a/‎Bitstream.h
Lines changed: 34 additions & 0 deletions b/‎Bitstream.h
Lines changed: 34 additions & 0 deletions
diff --git a/‎Dictionary.cpp
Lines changed: 170 additions & 0 deletions b/‎Dictionary.cpp
Lines changed: 170 additions & 0 deletions
diff --git a/‎Dictionary.h
Lines changed: 37 additions & 0 deletions b/‎Dictionary.h
Lines changed: 37 additions & 0 deletions
diff --git a/‎build-c++.bat
Lines changed: 20 additions & 0 deletions b/‎build-c++.bat
Lines changed: 20 additions & 0 deletions
diff --git a/‎build-c.bat
Lines changed: 17 additions & 0 deletions b/‎build-c.bat
Lines changed: 17 additions & 0 deletions
@@ -1,3 +1,7 @@
+*.lznt1
+*.xpress
+*.xpress_huff
+
 # Compiled Object files
 *.slo
 *.lo
 
@@ -0,0 +1,80 @@
+#include "stdafx.h"
+#include "Bitstream.h"
+
+
+// Reading functions:
+void BSReadInit(InputBitstream* bstr, const_bytes in, size_t len)
+{
+	assert(in);
+	assert(4 < len);
+	bstr->data.in = in;
+	bstr->index = 4;
+	bstr->len = len;
+	bstr->mask = (GET_UINT16(in) << 16) | GET_UINT16(in+2);
+	bstr->bits = 32;
+	bstr->pntr[0] = NULL;
+	bstr->pntr[1] = NULL;
+}
+uint32_t BSPeek(const InputBitstream* bstr, byte n) { return (n > bstr->bits) ? 0xFFFFFFFF : ((n == 0) ? 0 : (bstr->mask >> (32 - n))); }
+void BSSkip(InputBitstream* bstr, byte n)
+{
+	bstr->mask <<= n;
+	bstr->bits -= n;
+	if (bstr->bits < 16 && bstr->index + 2 <= bstr->len)
+	{
+		bstr->mask |= GET_UINT16(bstr->data.in + bstr->index) << (16 - bstr->bits);
+		bstr->bits |= 0x10; //bstr->bits += 16;
+		bstr->index += 2;
+	}
+}
+byte BSReadBit(InputBitstream* bstr) { byte x = 0xFF; if (bstr->bits) { x = (byte)(bstr->mask >> 31); BSSkip(bstr, 1); } return x; }
+uint32_t BSReadBits(InputBitstream* bstr, byte n) { uint32_t x = BSPeek(bstr, n); if (x != 0xFFFFFFFF) { BSSkip(bstr, n); } return x; }
+
+
+// Writing functions:
+void BSWriteInit(OutputBitstream* bstr, bytes out, size_t len)
+{
+	assert(out);
+	assert(4 <= len);
+	bstr->data.out = out;
+	bstr->index = 4;
+	bstr->len = len;
+	bstr->mask = 0;
+	bstr->bits = 0;
+	bstr->pntr[0] = (uint16_t*)out;
+	bstr->pntr[1] = (uint16_t*)(out+2);
+}
+bool BSWriteBits(OutputBitstream* bstr, uint32_t b, byte n)
+{
+	bstr->mask |= b << (32 - (bstr->bits += n));
+	if (bstr->bits > 16)
+	{
+		if (bstr->pntr[1] == NULL) return false; // only 16 bits can fit into pntr[0]!
+		SET_UINT16(bstr->pntr[0], bstr->mask >> 16);
+		bstr->mask <<= 16;
+		bstr->bits &= 0xF; //bstr->bits -= 16;
+		bstr->pntr[0] = bstr->pntr[1];
+		if (bstr->index + 2 > bstr->len)
+		{
+			// No more uint16s are available, however we can still write 16 more bits to pntr[0]
+			bstr->pntr[1] = NULL;
+		}
+		else
+		{
+			bstr->pntr[1] = (uint16_t*)(bstr->data.out+bstr->index);
+			bstr->index += 2;
+		}
+	}
+	return true;
+}
+bool BSWriteByte(OutputBitstream* bstr, byte b)
+{
+	if (bstr->index >= bstr->len) return false;
+	bstr->data.out[bstr->index++] = b;
+	return true;
+}
+void BSWriteFinish(OutputBitstream* bstr)
+{
+	SET_UINT16(bstr->pntr[0], bstr->mask >> 16); // if !bits then mask is 0 anyways
+	if (bstr->pntr[1]) *bstr->pntr[1] = 0;
+}
@@ -0,0 +1,34 @@
+#pragma once
+
+////////////////////////////// Bitstreams //////////////////////////////////////////////////////////
+// A bitstream that allows either reading or writing, but not both at the same time.
+// It reads uint16s for bits and 16 bits can be reliably read at a time
+struct _Bitstream
+{
+	union
+	{
+		const_bytes in;	// Reading only: The input byte array
+		bytes out;		// Writing only: The output byte array
+	} data;
+	size_t index, len;	// The current position and length of the stream
+	uint32_t mask;		// The next bits to be read/written in the bitstream
+	byte bits;			// The number of bits in mask that are valid
+	uint16_t* pntr[2];	// Writing only: the uint16's to write the data in mask to when there are enough bits
+};
+typedef struct _Bitstream InputBitstream;
+typedef struct _Bitstream OutputBitstream;
+
+
+// Reading functions:
+void BSReadInit(InputBitstream* bstr, const_bytes in, size_t len);
+uint32_t BSPeek(const InputBitstream* bstr, byte n);
+void BSSkip(InputBitstream* bstr, byte n);
+byte BSReadBit(InputBitstream* bstr);
+uint32_t BSReadBits(InputBitstream* bstr, byte n);
+
+
+// Writing functions:
+void BSWriteInit(OutputBitstream* bstr, bytes out, size_t len);
+bool BSWriteBits(OutputBitstream* bstr, uint32_t b, byte n);
+bool BSWriteByte(OutputBitstream* bstr, byte b);
+void BSWriteFinish(OutputBitstream* bstr);
@@ -0,0 +1,170 @@
+#include "stdafx.h"
+#include "Dictionary.h"
+
+// Implementation designed for being extremely fast at the expense of memory
+// usage. The base memory usage is 512 KB (or 768 KB on 64-bit systems). More
+// memory is always allocated but only as much as needed. Larger sized chunks
+// will consume more memory. For a series of 4 KB chunks, the extra consumed
+// memory is around 20-80 KB. For a series of 64 KB chunks, it is 200-800 KB.
+
+// This implementation is ~30x faster than the 576 KB fixed-size Dictionary!
+
+#ifdef __cplusplus_cli
+#pragma unmanaged
+#endif
+
+#pragma optimize("t", on)
+
+#define MAX_BYTE	0x100					// maximum byte value (+1 for 0)
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))	// minimum of 2 values
+
+// An entry within the dictionary, using a dynamically resized array of positions
+typedef struct _Entry // 8+ bytes (12+ bytes on 64-bit systems)
+{
+	const_bytes* pos;
+	uint16_t size, cap;
+} Entry;
+
+// The dictionary
+struct _Dictionary // 512+ KB (768+ KB on 64-bit systems)
+{
+	Entry entries[MAX_BYTE][MAX_BYTE];
+};
+
+// Creates and returns an uninitialized dictionary struct
+Dictionary* Dictionary_Create()
+{
+	Dictionary* d = (Dictionary*)malloc(sizeof(Dictionary));
+	if (d) memset(d, 0, sizeof(Dictionary)); // need to set pos and cap to 0, might as well set size to 0
+#ifdef PRINT_ERRORS
+	else PRINT_ERROR("Dictionary Creation Error: malloc failed\n");
+#endif
+	return d;
+}
+
+// Destroys a dictionary struct
+void Dictionary_Destroy(Dictionary* d)
+{
+	uint_fast16_t i, j;
+	for (i = 0; i < MAX_BYTE; ++i)
+		for (j = 0; j < MAX_BYTE; ++j)
+			free(d->entries[i][j].pos);
+	free(d);
+}
+
+// Resets a dictionary struct ready to start a new chunk
+// This should also be called after Dictionary_Create and before any Dictionary_Add/Dictionary_Find
+// Returns true on success, false on error (and sets errno)
+bool Dictionary_Reset(Dictionary* d)
+{
+	uint_fast16_t i, j;
+	for (i = 0; i < MAX_BYTE; ++i)
+		for (j = 0; j < MAX_BYTE; ++j)
+			d->entries[i][j].size = 0;
+	return true;
+}
+
+// Adds data to the dictionary, which will be used as a starting point during future finds
+// Max length is how many bytes can be read from data, regardless of the end of the chunk
+// Returns true on success, false on error
+bool Dictionary_Add(Dictionary* d, const_bytes data, const size_t max_len)
+{
+	if (max_len >= 2)
+	{
+		const byte x = data[0], y = data[1];
+		Entry* e = d->entries[x]+y;
+		if (e->size >= e->cap)
+		{
+			const_bytes *temp = (const_bytes*)realloc(e->pos, (e->cap=(e->cap?((e->cap==0x8000)?0xFFFF:(e->cap<<1)):8))*sizeof(const_bytes));
+			if (temp == NULL)
+			{
+				PRINT_ERROR("Dictionary Add Error: realloc failed\n");
+				Dictionary_Destroy(d);
+				return false;
+			}
+			e->pos = temp;
+		}
+		e->pos[e->size++] = data;
+	}
+	return true;
+}
+
+// Finds the best symbol in the dictionary for the data at u[pos]
+// Returns the length of the string found, or 0 if nothing of length >= 3 was found
+// offset is set to the offset from the current position to the string
+uint_fast16_t Dictionary_Find(const Dictionary* d, const Dictionary* d2, const_bytes data, const uint_fast16_t max_len, const_bytes search, uint_fast16_t* offset)
+{
+	static const Entry DummyEntry = { NULL, 0, 0 }; // using this instead of NULL reduces a lot of checks
+
+	if (max_len >= 3 && data-search > 0)
+	{
+		const byte x = data[0], y = data[1];
+		const Entry* e = d->entries[x]+y, *e2 = d2 ? d2->entries[x]+y : &DummyEntry;
+		if (e->size || e2->size) // a match is possible
+		{
+			const byte z = data[2];
+			uint_fast16_t l = 0, o;
+			int_fast32_t ep = e->size - 1; // need to support all uint16 values and <0
+
+			// Try short repeats - this does not use the Dictionary at all
+			if (x == z && y == data[-1])
+			{
+				if (x == y) // x == y == z == data[-1]
+				{
+					// Repeating the last byte
+					o = 1;
+					l = 3;
+					while (l < max_len && data[l] == x)	{ ++l; }
+					--ep;
+					if (data-search > 1 && x == data[-2])
+						--ep;
+				}
+				else if (data-search > 1 && x == data[-2]) // x == z == data[-2], y == data[-1]
+				{
+					// Repeating the last two bytes
+					o = 2;
+					l = 3;
+					while (l < max_len && data[l] == y)	{ ++l; if (l < max_len && data[l] == x) { ++l; } else break; }
+					--ep;
+				}
+
+				// Found the best match, stop now
+				if (l == max_len) { *offset = o; return l; }
+			}
+
+			// Do an exhaustive search (with the possible positions)
+			if (ep < 0) { ep += (e=e2)->size-1; e2 = &DummyEntry; }
+			do
+			{
+				for (; ep >= 0 && e->pos[ep] >= search; --ep)
+				{
+					const const_bytes ss = e->pos[ep];
+					if (ss[2] == z)
+					{
+						const_bytes s = ss+3;
+						uint_fast16_t i = 3;
+						if (s == data) { s = ss; }
+						while (i < max_len && data[i] == *s)
+						{
+							++i;
+							if (++s == data) { s = ss; } // allow looping back, can have l > o
+						}
+						if (i > l) { o = (uint_fast16_t)(data-ss); if ((l = i) == max_len) { break; } }
+					}
+				}
+				ep = (e=e2)->size - 1;
+				e2 = &DummyEntry;
+			} while (ep >= 0);
+
+			// Found a match, return it
+			if (l >= 3)
+			{
+				*offset = o;
+				return l;
+			}
+		}
+	}
+
+	// No match found, return 0
+	return 0;
+}
@@ -0,0 +1,37 @@
+#pragma once
+
+/////////////////// Dictionary /////////////////////////////////////////////////
+// The dictionary system used for LZNT1 and XPRESS compression.
+//
+// Most of the compression time is spent in the dictionary - particularly Find and Add.
+//
+// The compressor does not care about the format of the dictionary struct, it is
+// completely agnostic to it and any of the function implementations.
+
+
+struct _Dictionary;
+typedef struct _Dictionary Dictionary;
+
+
+// Creates and returns an uninitialized dictionary struct
+// Returns NULL on error (and sets errno)
+Dictionary* Dictionary_Create();
+
+// Destroys a dictionary struct
+void Dictionary_Destroy(Dictionary* d);
+
+// Resets a dictionary struct ready to start a new chunk
+// This should also be called after Dictionary_Create and before any Dictionary_Add/Dictionary_Find
+// Returns true on success, false on error (and sets errno)
+bool Dictionary_Reset(Dictionary* d);
+
+// Adds data to the dictionary, which will be used as a starting point during future finds
+// Max length is how many bytes can be read from data, regardless of the end of the chunk
+// Returns true on success, false on error
+bool Dictionary_Add(Dictionary* d, const_bytes data, const size_t max_len);
+
+// Finds the best symbol in the dictionary(ies) for the data
+// The second dictionary may be NULL for independent chunks, or the dictionary for the previous chunk is overlap can occur
+// Returns the length of the string found, or 0 if nothing of length >= 3 was found
+// offset is set to the offset from the current position to the string
+uint_fast16_t Dictionary_Find(const Dictionary* d, const Dictionary* d2, const_bytes data, const uint_fast16_t max_len, const_bytes search, uint_fast16_t* offset);
@@ -0,0 +1,20 @@
+@echo off
+
+:: This builds using MinGW-w64 for 32 and 64 bit (http://mingw-w64.sourceforge.net/)
+:: Make sure both mingw-w32\bin and mingw-w64\bin are in the PATH
+
+::-Werror
+set FLAGS=-mconsole -static-libgcc -static-libstdc++ -O3 -march=core2 -Wall -s
+set FILES=compression.cpp Dictionary.cpp Bitstream.cpp lznt1.cpp lzx.cpp xpress.cpp xpress_huff.cpp test.cpp
+set OUT=compression
+
+echo Compiling 32-bit...
+i686-w64-mingw32-g++ %FLAGS% %FILES% -o %OUT%.exe
+
+echo.
+
+echo Compiling 64-bit...
+
+x86_64-w64-mingw32-g++ %FLAGS% %FILES% -o %OUT%64.exe
+
+pause
@@ -0,0 +1,17 @@
+@echo off
+
+:: This builds using MinGW-w64 for 32 and 64 bit (http://mingw-w64.sourceforge.net/)
+:: Make sure both mingw-w32\bin and mingw-w64\bin are in the PATH
+
+echo Compiling 32-bit...
+i686-w64-mingw32-gcc -mconsole -static-libgcc -O3 -march=core2 -o compression.exe -s ^
+	compression.c Dictionary.c Bitstream.c lznt1.c lzx.c xpress.c xpress_huff.c test.c ^
+
+echo.
+
+echo Compiling 64-bit...
+
+x86_64-w64-mingw32-gcc -mconsole -static-libgcc -O3 -march=core2 -o compression64.exe -s ^
+	compression.c Dictionary.c Bitstream.c lznt1.c lzx.c xpress.c xpress_huff.c test.c
+
+pause