Skip to content
This repository was archived by the owner on Oct 10, 2023. It is now read-only.

Commit 49076cb

Browse files
committed
Initial commit
0 parents  commit 49076cb

20 files changed

+457
-0
lines changed

.ci/aptPackagesToInstall.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
libblast
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://github.com/fileTestSuite/fileTestSuite.py

.editorconfig

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
root = true
2+
3+
[*]
4+
charset = utf-8
5+
indent_style = tab
6+
indent_size = 4
7+
insert_final_newline = true
8+
end_of_line = lf
9+
10+
[*.{yml,yaml}]
11+
indent_style = space
12+
indent_size = 2

.gitattributes

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
*.imploded filter=lfs diff=lfs merge=lfs -text
2+
*.decomp filter=lfs diff=lfs merge=lfs -text

.github/.templateMarker

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
KOLANICH/python_project_boilerplate.py

.github/dependabot.yml

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
version: 2
2+
updates:
3+
- package-ecosystem: "pip"
4+
directory: "/"
5+
schedule:
6+
interval: "daily"
7+
allow:
8+
- dependency-type: "all"

.github/workflows/CI.yml

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: CI
2+
on:
3+
push:
4+
branches: [master]
5+
pull_request:
6+
branches: [master]
7+
8+
jobs:
9+
build:
10+
runs-on: ubuntu-22.04
11+
steps:
12+
- name: typical python workflow
13+
uses: KOLANICH-GHActions/typical-python-workflow@master
14+
with:
15+
github_token: ${{ secrets.GITHUB_TOKEN }}

.gitignore

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
__pycache__
2+
*.pyc
3+
*.pyo
4+
/*.egg-info
5+
*.srctrlbm
6+
*.srctrldb
7+
build
8+
dist
9+
.eggs
10+
monkeytype.sqlite3
11+
/.ipynb_checkpoints

.gitlab-ci.yml

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest
2+
3+
variables:
4+
DOCKER_DRIVER: overlay2
5+
SAST_ANALYZER_IMAGE_TAG: latest
6+
SAST_DISABLE_DIND: "true"
7+
SAST_CONFIDENCE_LEVEL: 5
8+
CODECLIMATE_VERSION: latest
9+
10+
include:
11+
- template: SAST.gitlab-ci.yml
12+
- template: Code-Quality.gitlab-ci.yml
13+
- template: License-Management.gitlab-ci.yml
14+
15+
build:
16+
tags:
17+
- shared
18+
- linux
19+
stage: build
20+
variables:
21+
GIT_DEPTH: "1"
22+
PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages
23+
24+
before_script:
25+
- export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables`
26+
- apt-get update
27+
# todo:
28+
#- apt-get -y install
29+
#- pip3 install --upgrade
30+
#- python3 ./fix_python_modules_paths.py
31+
32+
script:
33+
- python3 -m build -nw bdist_wheel
34+
- mv ./dist/*.whl ./dist/pkblast-0.CI-py3-none-any.whl
35+
- pip3 install --upgrade ./dist/*.whl
36+
- coverage run --source=pkblast -m --branch pytest --junitxml=./rspec.xml ./tests/test.py
37+
- coverage report -m
38+
- coverage xml
39+
40+
coverage: /^TOTAL(?:\s+\d+){4}\s+(\d+%).+/
41+
42+
cache:
43+
paths:
44+
- $PYTHONUSERBASE
45+
46+
artifacts:
47+
paths:
48+
- dist
49+
reports:
50+
junit: ./rspec.xml
51+
cobertura: ./coverage.xml

.gitmodules

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
[submodule "tests/testDataset"]
2+
path = tests/testDataset
3+
url = https://github.com/implode-compression-impls/implode_test_files
4+
branch = merged
5+
shallow = true

Code_Of_Conduct.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
No codes of conduct!

MANIFEST.in

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
include UNLICENSE
2+
include *.md
3+
include tests
4+
include .editorconfig

ReadMe.md

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
pkblast.py [![Unlicensed work](https://raw.githubusercontent.com/unlicense/unlicense.org/master/static/favicon.png)](https://unlicense.org/)
2+
==========
3+
~~[wheel (GitLab)](https://gitlab.com/KOLANICH/pkblast.py/-/jobs/artifacts/master/raw/dist/pkblast-0.CI-py3-none-any.whl?job=build)~~
4+
~~[wheel (GHA via `nightly.link`)](https://nightly.link/implode-compression-impls/pkblast.py/workflows/CI/master/pkblast-0.CI-py3-none-any.whl)~~
5+
~~![GitLab Build Status](https://gitlab.com/KOLANICH/pkblast.py/badges/master/pipeline.svg)~~
6+
~~![GitLab Coverage](https://gitlab.com/KOLANICH/pkblast.py/badges/master/coverage.svg)~~
7+
~~[![GitHub Actions](https://github.com/implode-compression-impls/pkblast.py/workflows/CI/badge.svg)](https://github.com/implode-compression-impls/pkblast.py/actions/)~~
8+
[![Libraries.io Status](https://img.shields.io/librariesio/github/implode-compression-impls/pkblast.py.svg)](https://libraries.io/github/implode-compression-impls/pkblast.py)
9+
[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://codeberg.org/KOLANICH-tools/antiflash.py)
10+
11+
This are free and Open-Source ctypes-based bindings to [`libblast`](https://github.com/madler/zlib/tree/master/contrib/blast) by [Mark @madler Adler](https://github.com/madler), which is a Free Open-Source implementation of a decompressor of PKWare Data Compression Library (DCL) compression format.
12+
13+
For compression you need [`pkimplode.py`](https://codeberg.org/implode-compression-impls/pkimplode.py) a separate wrapper to a separate library by another author.
14+
15+
For decompression you can alternatively use [`pwexplode`](https://github.com/Schallaven/pwexplode) a pure-python impl, but it is licensed under GPL-3.0-or-later.
16+
17+
Benefits of CTypes-based impl:
18+
19+
* Supports python versions other than CPython
20+
* No need to recompile python module after python version upgrade
21+
22+
Drawbacks:
23+
* performance and overhead may be worse, than in the case of a cext.
24+
25+
Installation
26+
------------
27+
28+
In order to make it work you need a package with `liblast` itself installed into your system using your distro package manager. If your distro doesn't provide one, you can build it yourself using CMake CPack from the sources [by the link](https://codeberg.org/implode-compression-impls/libblast). You will get 3 packages, one with the headers, another one with the shared library, and yet another one with the CLI tool. Only the one with the lib is mandatory.
29+
30+
Usage
31+
-----
32+
33+
The package contains multiple functions. They have names matching the regular expression `^decompress(Stream|Bytes(Whole|Chunked))To(Stream|Bytes)$`.
34+
35+
The first subgroup describes the type of input argument, the second subgroup describes the type of output.
36+
* If input is `Bytes`, then you need
37+
* `Whole`, which means that the lib gots a pointer to whole array with compressed data. This is considered to be **the optimal input format**.
38+
* `Chunked` (which means the data are processed in reality by `decompressStreamTo$3`) was created mainly for convenience of testing.
39+
* Otherwise it is an object acting like a stream. In this case you can also provide `chunkSize`, because streams are processed in chunks. Larger the chunk - less the count of chunks in the stream, so less overhead on calls of callbacks, but more memory is needed to store the chunk.
40+
41+
The second subgroup describes the type of the result.
42+
* The internal type of the result is always a `Stream`. This is considered to be **the optimal output format**. It is because we don't know the size of output ahead of time, so have to use streams.
43+
* `Bytes` are only for your convenience and just wrap the `decompress$1ToStream` with a context with `BytesIO`.

UNLICENSE

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
This is free and unencumbered software released into the public domain.
2+
3+
Anyone is free to copy, modify, publish, use, compile, sell, or
4+
distribute this software, either in source code form or as a compiled
5+
binary, for any purpose, commercial or non-commercial, and by any
6+
means.
7+
8+
In jurisdictions that recognize copyright laws, the author or authors
9+
of this software dedicate any and all copyright interest in the
10+
software to the public domain. We make this dedication for the benefit
11+
of the public at large and to the detriment of our heirs and
12+
successors. We intend this dedication to be an overt act of
13+
relinquishment in perpetuity of all present and future rights to this
14+
software under copyright law.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22+
OTHER DEALINGS IN THE SOFTWARE.
23+
24+
For more information, please refer to <https://unlicense.org/>

pkblast/BlastError.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from enum import IntEnum
2+
3+
4+
class BlastError(IntEnum):
5+
success = 0
6+
7+
# If there is not enough input available or there is not enough output space, then a positive error is returned.
8+
outputError = 1
9+
inputExhausted = 2
10+
11+
# Errors in the source data
12+
wrongLiteralFlag = -1 # literal flag not zero or one
13+
wrongDictionary = -2 # dictionary size not in 4..6
14+
distanceTooBig = -3 # distance is too far back

pkblast/__init__.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import typing
2+
from collections.abc import ByteString
3+
from io import BytesIO, IOBase
4+
from mmap import mmap
5+
from warnings import warn
6+
7+
from .BlastError import BlastError
8+
from .ctypes import _decompressBytes, _decompressStream
9+
10+
__all__ = ("decompressStreamToStream", "decompressStreamToBytes", "decompressBytesWholeToStream", "decompressBytesWholeToBytes", "decompressBytesChunkedToStream", "decompressBytesChunkedToBytes", "decompress", "DEFAULT_CHUNK_SIZE")
11+
12+
DEFAULT_CHUNK_SIZE = 16384
13+
14+
15+
def decompressStreamToStream(inputStream: IOBase, outputStream: IOBase, chunkSize: int = DEFAULT_CHUNK_SIZE) -> (int, IOBase):
16+
"""Used to do streaming decompression. The first arg is the stream to read from, the second ard is the stream to write to.
17+
May be a memory map. `chunkSize` is the hint"""
18+
19+
errorCode, left = _decompressStream(inputStream, outputStream, chunkSize=chunkSize)
20+
21+
if errorCode:
22+
raise Exception(BlastError(errorCode))
23+
24+
return left.value, outputStream
25+
26+
27+
def decompressStreamToBytes(inputStream: IOBase, chunkSize: int = DEFAULT_CHUNK_SIZE) -> (int, ByteString):
28+
"""Decompresses `inputStream` into `outputStream`. Processes the whole data. You should use it instead of `decompressBytesChunkedToStream`, if it is possible."""
29+
with BytesIO() as outputStream:
30+
left, _ = decompressStreamToStream(inputStream, outputStream, chunkSize)
31+
return left, outputStream.getvalue()
32+
33+
34+
def decompressBytesWholeToStream(compressed: ByteString, outputStream: IOBase) -> (int, IOBase):
35+
"""Decompresses `compressed` into `outputStream`. Processes the whole data. You should use it instead of `decompressBytesChunkedToStream`, if it is possible."""
36+
errorCode, left = _decompressBytes(compressed, outputStream)
37+
38+
if errorCode:
39+
raise Exception(BlastError(errorCode))
40+
41+
return left.value, outputStream
42+
43+
44+
def decompressBytesWholeToBytes(compressed: ByteString) -> (int, ByteString):
45+
"""Decompresses `compressed` and returns tuple (remaining, decompressed)`. Processes the whole data. You should use it instead of `decompressBytesChunkedToStream`, if it is possible."""
46+
with BytesIO() as outputStream:
47+
left, _ = decompressBytesWholeToStream(compressed, outputStream)
48+
return left, outputStream.getvalue()
49+
50+
51+
def decompressBytesChunkedToStream(compressed: ByteString, outputStream: IOBase, chunkSize: int = DEFAULT_CHUNK_SIZE) -> (int, IOBase):
52+
"""Decompresses `compressed` into `outputStream`. Processes the `compressed` the same way `decompressStreamToStream` does. In fact it is just a wrapper around it and `BytesIO`. Has bigger overhead than `decompressBytesWholeToStream` for the data that is already in memory: first it copies the data into `BytesIO`, then it allocates space for chunks, then it copies data from there into chunks, each copying has overhead of calling from C into python."""
53+
_efficiencyDeprecationMessage(decompressBytesChunkedToStream, decompressBytesWholeToStream)
54+
chunkSize = min(chunkSize, len(compressed))
55+
with BytesIO(compressed) as inputStream:
56+
return decompressStreamToStream(inputStream, outputStream, chunkSize=chunkSize)
57+
58+
59+
def decompressBytesChunkedToBytes(compressed: ByteString, chunkSize: int = DEFAULT_CHUNK_SIZE) -> (int, ByteString):
60+
"""Decompresses `compressed` into `bytes`. Processes the `compressed` the same way `decompressStreamToStream` does. In fact it is just a wrapper around it and `BytesIO`. Has bigger overhead than `decompressBytesWholeToStream` for the data that is already in memory: first it copies the data into `BytesIO`, then it allocates space for chunks, then it copies data from there into chunks, each copying has overhead of calling from C into python."""
61+
_efficiencyDeprecationMessage(decompressBytesChunkedToBytes, decompressBytesWholeToBytes)
62+
with BytesIO() as outputStream:
63+
left, _ = decompressBytesChunkedToStream(compressed, outputStream, chunkSize)
64+
return left, outputStream.getvalue()
65+
66+
67+
def _efficiencyDeprecationMessage(calledFunc, func) -> None:
68+
warn("It is inefficient to use `" + calledFunc.__name__ + "`. Use `" + func.__name__ + "` for this use case")
69+
70+
71+
_functionsUseCaseMapping = (
72+
decompressStreamToStream,
73+
decompressBytesWholeToStream,
74+
decompressStreamToBytes,
75+
decompressBytesWholeToBytes,
76+
)
77+
78+
79+
def decompress(compressed: typing.Union[ByteString, IOBase], outputStream: typing.Optional[IOBase] = None, chunkSize: int = DEFAULT_CHUNK_SIZE) -> (int, typing.Union[ByteString, IOBase]):
80+
"""A convenience function. It is better to use the more specialized ones since they have less overhead. It decompresses `compressed` into `outputStream` and returns a tuple `(left, output)`.
81+
`compressed` can be either a stream, or `bytes`-like stuff.
82+
If `outputStream` is None, then it returns `bytes`. If `outputStream` is a stream, it writes into it.
83+
`left` returned is the count of bytes in the array/stream that weren't processed."""
84+
85+
isOutputBytes = outputStream is None
86+
isInputBytes = isinstance(compressed, (ByteString, mmap))
87+
selector = isOutputBytes << 1 | int(isInputBytes)
88+
func = _functionsUseCaseMapping[selector]
89+
argz = [compressed]
90+
if not isOutputBytes:
91+
argz.append(outputStream)
92+
if not isInputBytes:
93+
argz.append(chunkSize)
94+
_efficiencyDeprecationMessage(decompress, func)
95+
return func(*argz)

pkblast/ctypes.py

+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
import ctypes
2+
import platform
3+
from collections.abc import ByteString
4+
from functools import partial
5+
from mmap import mmap
6+
7+
__all__ = ("_decompressBytes", "_decompressStream")
8+
9+
# how buf
10+
blast_in = ctypes.CFUNCTYPE(ctypes.c_uint32, ctypes.POINTER(None), ctypes.POINTER(ctypes.POINTER(ctypes.c_ubyte)))
11+
# how buf len
12+
blast_out = ctypes.CFUNCTYPE(ctypes.c_int32, ctypes.POINTER(None), ctypes.POINTER(ctypes.c_ubyte), ctypes.c_uint32)
13+
14+
15+
lib = None
16+
17+
18+
def blast(infun: blast_in, inhow: ctypes.c_void_p, outfun: blast_out, outhow: ctypes.c_void_p, left: ctypes.POINTER(ctypes.c_uint32) = None, optionalInputArrayAndOutputLeftInfoPtr: ctypes.POINTER(ctypes.POINTER(ctypes.c_byte)) = None) -> ctypes.c_int:
19+
"""If there is any unused input, *left is set to the number of bytes that were read and *in points to them. Otherwise *left is set to zero and *in is set to NULL. If left or in are NULL, then they are not set."""
20+
21+
return lib.blast(infun, inhow, outfun, outhow, left, optionalInputArrayAndOutputLeftInfoPtr)
22+
23+
24+
def _initLibrary():
25+
if platform.system() == "Windows":
26+
lib = ctypes.CDLL("libblast.dll")
27+
else:
28+
lib = ctypes.CDLL("libblast.so")
29+
30+
lib.blast.argtypes = [blast.__annotations__[argName] for argName in blast.__code__.co_varnames[: blast.__code__.co_argcount]]
31+
lib.blast.restype = blast.__annotations__["return"]
32+
return lib
33+
34+
35+
lib = _initLibrary()
36+
37+
38+
def outputCallback(outputStream, how, buf, l):
39+
return outputStream.write(bytes(buf[:l])) != l
40+
41+
42+
def inputCallbackStream(inputStream, hold, holdPtr, how, buf):
43+
countRead = inputStream.readinto(hold)
44+
buf[0] = holdPtr
45+
return countRead
46+
47+
48+
def _decompressStream(inputStream, outputStream, chunkSize: int = 16384) -> (int, int):
49+
hold = (ctypes.c_byte * chunkSize).from_buffer(bytearray(chunkSize))
50+
holdPtr = ctypes.cast(ctypes.pointer(hold), ctypes.POINTER(ctypes.c_ubyte))
51+
52+
left = ctypes.c_uint32(0)
53+
54+
return (
55+
blast(
56+
blast_in(partial(inputCallbackStream, inputStream, hold, holdPtr)), None,
57+
blast_out(partial(outputCallback, outputStream)), None,
58+
ctypes.byref(left), None
59+
),
60+
left
61+
)
62+
63+
64+
def inputCallbackBytes(inputBytesPtr, l, how, buf):
65+
buf[0] = inputBytesPtr
66+
return l
67+
68+
69+
def _decompressBytes(inputBytes: ByteString, outputStream) -> (int, int):
70+
if isinstance(inputBytes, bytes):
71+
inputBytesC = ctypes.create_string_buffer(inputBytes)
72+
else:
73+
inputBytesC = (ctypes.c_byte * len(inputBytes)).from_buffer(inputBytes)
74+
inputBytesPtr = ctypes.cast(ctypes.pointer(inputBytesC), ctypes.POINTER(ctypes.c_ubyte))
75+
left = ctypes.c_uint32(0)
76+
77+
return (
78+
blast(
79+
blast_in(partial(inputCallbackBytes, inputBytesPtr, len(inputBytes))), None,
80+
blast_out(partial(outputCallback, outputStream)), None,
81+
ctypes.byref(left), None
82+
),
83+
left
84+
)

0 commit comments

Comments
 (0)