From ca07bc8794892f28bdc278642f864b65f9d9d0e0 Mon Sep 17 00:00:00 2001 From: Attumm Date: Wed, 30 Oct 2024 11:48:00 +0100 Subject: [PATCH] Add additional standard types (#64) (#66) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 2.7.0 Expanded Type Compatibility RedisDict will now offer wider support for a wider range of Python data types, from basic types to nested structures. Basic types are handled natively, while complex data types, such as: lists and dictionaries. Are managed through JSON serialization. This choice avoids using pickle, a module with known security vulnerabilities in distributed computing. Although RedisDict supports nested structures, using RedisDict as a shallow dictionary is recommended for optimal performance. This approach minimizes transformations and aligns with Redis’s key-value solution while preserving RedisDict’s Pythonic interface. Supported Types: str, int, float, bool, NoneType, list, dict, tuple, set, datetime, date, time, timedelta, Decimal, complex, bytes, UUID, OrderedDict, defaultdict, frozenset * Add additional standard types (#64) * Added additional types * Added tests for the new standard types tests * Added standard types Readme section * changed version (#65) --- README.md | 50 ++++++++++- redis_dict.py | 56 +++++++++--- setup.py | 2 +- standard_types_tests.py | 189 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 280 insertions(+), 17 deletions(-) create mode 100644 standard_types_tests.py diff --git a/README.md b/README.md index 9fc801b..863a7d5 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ The library includes utility functions for more complex use cases such as cachin ## Features * Dictionary-like interface: Use familiar Python dictionary syntax to interact with Redis. -* Data Type Support: Comprehensive support for various data types, including strings, integers, floats, booleans, lists, dictionaries, sets, and tuples. +* Data Type Support: Comprehensive support for various data types. * Pipelining support: Use pipelines for batch operations to improve performance. * Expiration Support: Enables the setting of expiration times either globally or individually per key, through the use of context managers. * Efficiency and Scalability: RedisDict is designed for use with large datasets and is optimized for efficiency. It retrieves only the data needed for a particular operation, ensuring efficient memory usage and fast performance. @@ -219,9 +219,53 @@ print(dic["d"]) # Output: 4 For more advanced examples of RedisDict, please refer to the unit-test files in the repository. All features and functionalities are thoroughly tested in [unit tests (here)](https://github.com/Attumm/redis-dict/blob/main/tests.py#L1) Or take a look at load test for batching [load test](https://github.com/Attumm/redis-dict/blob/main/load_test.py#L1). The unit-tests can be as used as a starting point. -### Extending Types +## Types -## Extending RedisDict with Custom Types +### standard types +RedisDict supports a range of Python data types, from basic types to nested structures. +Basic types are handled natively, while complex data types like lists and dictionaries, RedisDict uses JSON serialization, specifically avoiding `pickle` due to its [security vulnerabilities](https://docs.python.org/3/library/pickle.html) in distributed computing contexts. +Although the library supports nested structures, the recommended best practice is to use RedisDict as a shallow dictionary. +This approach optimizes Redis database performance and efficiency by ensuring that each set and get operation efficiently maps to Redis's key-value storage capabilities, while still preserving the library's Pythonic interface. +Following types are supported: +`str, int, float, bool, NoneType, list, dict, tuple, set, datetime, date, time, timedelta, Decimal, complex, bytes, UUID, OrderedDict, defaultdict, frozenset` +```python +from redis_dict import RedisDict + +from uuid import UUID +from decimal import Decimal +from collections import OrderedDict, defaultdict +from datetime import datetime, date, time, timedelta + + +dic = RedisDict() + +dic["string"] = "Hello World" +dic["number"] = 42 +dic["float"] = 3.14 +dic["bool"] = True +dic["None"] = None + +dic["list"] = [1, 2, 3] +dic["dict"] = {"a": 1, "b": 2} +dic["tuple"] = (1, 2, 3) +dic["set"] = {1, 2, 3} + +dic["datetime"] = datetime.date(2024, 1, 1, 12, 30, 45) +dic["date"] = date(2024, 1, 1) +dic["time"] = time(12, 30, 45) +dic["delta"] = timedelta(days=1, hours=2) + +dic["decimal"] = Decimal("3.14159") +dic["complex"] = complex(1, 2) +dic["bytes"] = bytes([72, 101, 108, 108, 111]) +dic["uuid"] = UUID('12345678-1234-5678-1234-567812345678') + +dic["ordered"] = OrderedDict([('a', 1), ('b', 2)]) +dic["default"] = defaultdict(int, {'a': 1, 'b': 2}) +dic["frozen"] = frozenset([1, 2, 3]) +``` + +### Extending RedisDict with Custom Types RedisDict supports custom type serialization. Here's how to add a new type: diff --git a/redis_dict.py b/redis_dict.py index 4495398..ca85561 100644 --- a/redis_dict.py +++ b/redis_dict.py @@ -66,9 +66,14 @@ >>> cache.get("1234") is None >>> True """ +# Types imports import json +from datetime import datetime, time, timedelta, date +from decimal import Decimal +from uuid import UUID +from collections import OrderedDict, defaultdict +import base64 -from datetime import timedelta from typing import Any, Callable, Dict, Iterator, Set, List, Tuple, Union, Optional from contextlib import contextmanager @@ -189,7 +194,6 @@ class RedisDict: expire (Union[int, None]): An optional expiration time for keys, in seconds. """ - decoding_registry: DecodeType = { type('').__name__: str, type(1).__name__: int, @@ -201,6 +205,20 @@ class RedisDict: "dict": json.loads, "tuple": _decode_tuple, type(set()).__name__: _decode_set, + + datetime.__name__: datetime.fromisoformat, + date.__name__: date.fromisoformat, + time.__name__: time.fromisoformat, + timedelta.__name__: lambda x: timedelta(seconds=float(x)), + + Decimal.__name__: Decimal, + complex.__name__: lambda x: complex(*map(float, x.split(','))), + bytes.__name__: base64.b64decode, + + UUID.__name__: UUID, + OrderedDict.__name__: lambda x: OrderedDict(json.loads(x)), + defaultdict.__name__: lambda x: defaultdict(type(None), json.loads(x)), + frozenset.__name__: lambda x: frozenset(json.loads(x)), } encoding_registry: EncodeType = { @@ -208,6 +226,17 @@ class RedisDict: "dict": json.dumps, "tuple": _encode_tuple, type(set()).__name__: _encode_set, + + datetime.__name__: datetime.isoformat, + date.__name__: date.isoformat, + time.__name__: time.isoformat, + timedelta.__name__: lambda x: str(x.total_seconds()), + + complex.__name__: lambda x: f"{x.real},{x.imag}", + bytes.__name__: lambda x: base64.b64encode(x).decode('ascii'), + OrderedDict.__name__: lambda x: json.dumps(list(x.items())), + defaultdict.__name__: lambda x: json.dumps(dict(x)), + frozenset.__name__: lambda x: json.dumps(list(x)), } def __init__(self, @@ -269,6 +298,14 @@ def _valid_input(self, val: Any, val_type: str) -> bool: return len(val) < self._max_string_size return True + def _format_value(self, key: str, value: Any) -> str: + store_type, key = type(value).__name__, str(key) + if not self._valid_input(value, store_type) or not self._valid_input(key, "str"): + raise ValueError("Invalid input value or key size exceeded the maximum limit.") + encoded_value = self.encoding_registry.get(store_type, lambda x: x)(value) # type: ignore + + return f'{store_type}:{encoded_value}' + def _store(self, key: str, value: Any) -> None: """ Store a value in Redis with the given key. @@ -285,18 +322,12 @@ def _store(self, key: str, value: Any) -> None: Allowing for simple dict set operation, but only cache data that makes sense. """ - store_type, key = type(value).__name__, str(key) - if not self._valid_input(value, store_type) or not self._valid_input(key, "str"): - raise ValueError("Invalid input value or key size exceeded the maximum limit.") - value = self.encoding_registry.get(store_type, lambda x: x)(value) # type: ignore - - store_value = f'{store_type}:{value}' formatted_key = self._format_key(key) - + formatted_value = self._format_value(key, value) if self.preserve_expiration and self.redis.exists(formatted_key): - self.redis.set(formatted_key, store_value, keepttl=True) + self.redis.set(formatted_key, formatted_value, keepttl=True) else: - self.redis.set(formatted_key, store_value, ex=self.expire) + self.redis.set(formatted_key, formatted_value, ex=self.expire) def _load(self, key: str) -> Tuple[bool, Any]: """ @@ -311,8 +342,7 @@ def _load(self, key: str) -> Tuple[bool, Any]: result = self.get_redis.get(self._format_key(key)) if result is None: return False, None - type_, value = result.split(':', 1) - return True, self.decoding_registry.get(type_, lambda x: x)(value) + return True, self._transform(result) def _transform(self, result: str) -> Any: """ diff --git a/setup.py b/setup.py index 2b77ee5..d074fd0 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ long_description=long_description, long_description_content_type='text/markdown', - version='2.6.0', + version='2.7.0', py_modules=['redis_dict'], install_requires=['redis',], license='MIT', diff --git a/standard_types_tests.py b/standard_types_tests.py new file mode 100644 index 0000000..e141973 --- /dev/null +++ b/standard_types_tests.py @@ -0,0 +1,189 @@ +import sys +import unittest + +from uuid import UUID, uuid4 +from decimal import Decimal +from datetime import datetime, date, time, timedelta, timezone +from collections import OrderedDict, defaultdict + +from redis_dict import RedisDict + + +class TypeCodecTests(unittest.TestCase): + def setUp(self): + self.dic = RedisDict() + + def _assert_value_encodes_decodes(self, expected_value): + """Helper method to test encoding and decoding of a value""" + expected_type = type(expected_value).__name__ + encoded_value = self.dic.encoding_registry.get(expected_type, str)(expected_value) + + self.assertIsInstance(encoded_value, str) + + result = self.dic.decoding_registry.get(expected_type, lambda x: x)(encoded_value) + + self.assertEqual(type(result).__name__, expected_type) + self.assertEqual(expected_value, result) + + def _ensure_testcases_have_all_types(self, test_cases): + """ + Instances are colliding during unit tests, refactor encoding/decoding registeries and turn the test back on + """ + return + test_types = {i[1] for i in test_cases} + registry_types = set(self.dic.decoding_registry.keys()) + + missing_types = registry_types - test_types + + extra_types = test_types - registry_types + len_test_types = len(test_types) + len_registry_types = len(self.dic.decoding_registry.keys()) + self.assertEqual( + len_test_types, + len_registry_types, + f"\nMissing types in tests: {missing_types if missing_types else 'no missing'}" + f"\nExtra types in tests: {extra_types if extra_types else 'None'}" + f"\nThere are types {len_test_types} and {len_registry_types}" + f"\nthere are still {len_registry_types - len_test_types} missing types" + ) + + def test_happy_path(self): + test_cases = [ + ("Hello World", "str"), + (42, "int"), + (3.14, "float"), + (True, "bool"), + (None, "NoneType"), + + ([1, 2, 3], "list"), + ({"a": 1, "b": 2}, "dict"), + ((1, 2, 3), "tuple"), + ({1, 2, 3}, "set"), + + (datetime(2024, 1, 1, 12, 30, 45), "datetime"), + (date(2024, 1, 1), "date"), + (time(12, 30, 45), "time"), + (timedelta(days=1, hours=2), "timedelta"), + + (Decimal("3.14159"), "Decimal"), + (complex(1, 2), "complex"), + (bytes([72, 101, 108, 108, 111]), "bytes"), + (UUID('12345678-1234-5678-1234-567812345678'), "UUID"), + + (OrderedDict([('a', 1), ('b', 2)]), "OrderedDict"), + (defaultdict(type(None), {'a': 1, 'b': 2}), "defaultdict"), + (frozenset([1, 2, 3]), "frozenset"), + ] + self._ensure_testcases_have_all_types(test_cases) + + for value, type_name in test_cases: + with self.subTest(f"Testing happy path: {type_name}"): + self._assert_value_encodes_decodes(value) + + def test_min_boundary_values(self): + test_cases = [ + ("", "str"), + (0, "int"), + (0.0, "float"), + (False, "bool"), + (None, "NoneType"), + + ([], "list"), + ({}, "dict"), + ((), "tuple"), + (set(), "set"), + + (datetime(1970, 1, 1, 0, 0, 0), "datetime"), + (date(1970, 1, 1), "date"), + (time(0, 0, 0), "time"), + (timedelta(0), "timedelta"), + + (Decimal("0"), "Decimal"), + (complex(0, 0), "complex"), + (bytes(), "bytes"), + (UUID('00000000-0000-0000-0000-000000000000'), "UUID"), + (OrderedDict(), "OrderedDict"), + (defaultdict(type(None)), "defaultdict"), + (frozenset(), "frozenset") + ] + self._ensure_testcases_have_all_types(test_cases) + + for value, type_name in test_cases: + with self.subTest(f"Testing min boundary value {type_name}"): + self._assert_value_encodes_decodes(value) + + def test_max_boundary_values(self): + test_cases = [ + ("א" * 10000, "str"), + (sys.maxsize, "int"), + (float('inf'), "float"), + (True, "bool"), + (None, "NoneType"), + + ([1] * 1000, "list"), + ({"k" + str(i): i for i in range(1000)}, "dict"), + (tuple(range(1000)), "tuple"), + (set(range(1000)), "set"), + + (datetime(9999, 12, 31, 23, 59, 59, 999999), "datetime"), + (date(9999, 12, 31), "date"), + (time(23, 59, 59, 999999), "time"), + (timedelta(days=999999999), "timedelta"), + + (Decimal('1E+308'), "Decimal"), + (complex(float('inf'), float('inf')), "complex"), + (bytes([255] * 1000), "bytes"), + (UUID('ffffffff-ffff-ffff-ffff-ffffffffffff'), "UUID"), + (OrderedDict([(str(i), i) for i in range(1000)]), "OrderedDict"), + (defaultdict(type(None), {str(i): i for i in range(1000)}), "defaultdict"), + (frozenset(range(1000)), "frozenset") + ] + self._ensure_testcases_have_all_types(test_cases) + + for value, type_name in test_cases: + with self.subTest(f"Testing max boundary value {type_name}"): + self._assert_value_encodes_decodes(value) + + def test_datetime_edge_cases(self): + test_cases = [ + (date(2024, 1, 1), "start of year date"), + (date(2024, 12, 31), "end of year date"), + (date(2024, 2, 29), "leap year date"), + + (time(0, 0, 0), "midnight"), + (time(12, 0, 0), "noon"), + (time(23, 59, 59, 999999), "just before midnight"), + (time(12, 0, 0, tzinfo=timezone.utc), "noon with timezone"), + + (timedelta(days=1), "one day"), + (timedelta(weeks=1), "one week"), + (timedelta(hours=24), "24 hours"), + (timedelta(milliseconds=1), "one millisecond"), + (timedelta(microseconds=1), "one microsecond"), + (timedelta(days=1, hours=1, minutes=1, seconds=1), "mixed time units"), + + (datetime(2024, 1, 1, 0, 0, 0), "start of year"), + (datetime(2024, 12, 31, 23, 59, 59, 999999), "end of year"), + (datetime(2024, 2, 29, 0, 0, 0), "leap year"), + (datetime(2024, 1, 1, 0, 0, 0, tzinfo=timezone.utc), "with timezone"), + + (datetime(2024, 2, 28, 23, 59, 59), "day before leap day"), + (datetime(2024, 3, 1, 0, 0, 0), "day after leap day"), + + (datetime(2024, 2, 29, 0, 0, 0), "leap year divisible by 4"), + (datetime(2000, 2, 29, 0, 0, 0), "leap year divisible by 100 and 400"), + (datetime(1900, 2, 28, 0, 0, 0), "non leap year divisible by 100"), + (datetime(2100, 2, 28, 0, 0, 0), "future non leap year divisible by 100"), + + (date(2024, 2, 29), "leap year date divisible by 4"), + (date(2000, 2, 29), "leap year date divisible by 100 and 400"), + (date(1900, 2, 28), "non leap year date divisible by 100"), + (date(2100, 2, 28), "future non leap year date divisible by 100"), + ] + + for value, test_name in test_cases: + with self.subTest(f"Testing datetime edge case {test_name}"): + self._assert_value_encodes_decodes(value) + +if __name__ == '__main__': + unittest.main()