Skip to content

Added LRU TTL Cache Implementation #1391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
.PHONY: install clean test retest coverage docs

install:
pip install -e .[docs,test,async]
pip install -e .[docs,test,async,ttlcache]
pip install bumpversion twine wheel

lint:
Expand Down
36 changes: 36 additions & 0 deletions docs/transport.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,42 @@ Changing the SqliteCache settings can be done via:
Another option is to use the InMemoryCache backend. It internally uses a
global dict to store urls with the corresponding content.

One of the options is to use TTLCache, This is a Least Recently Used (LRU) cache implementation with per-item time-to-live (TTL) value.
When you want to use TTLCache you will need to install the python
cachetools module. This can be done by installing the ``ttlcache`` extras::

pip install zeep[ttlcache]

Benefits of using TTLCache:

- TTLCache clears the entries after the items expire as compared to In Memory Cache which just marks the entries as expired but still holds them in memory.

- TTLCache lets you specify max size and a per item TTL.

.. code-block:: python

from zeep import Client
from zeep.cache import TTLCache
from zeep.transports import Transport

transport = Transport(cache=TTLCache(maxsize=5, ttl=10))
client = Client(
'http://www.webservicex.net/ConvertSpeed.asmx?WSDL',
transport=transport)

Parameters:

- maxsize: The maximum size of the cache. When this size is reached, least recently used items will be discarded from the cache. Note: The unit of measurement for maxsize is number of items, not bytes by default. This implementation can be overriden using the getsizeof method.

- ttl: The time-to-live value of the cache’s items. After ttl seconds, the entry will expire and be removed from the cache.

- getsizeof: In general, a cache’s size is the total size of its item’s values. Therefore, Cache provides a getsizeof() method, which returns the size of a given value. The default implementation of getsizeof() returns 1 irrespective of its argument, making the cache’s size equal to the number of its items, or len(cache).

.. code-block:: python

import sys
def getsizeof(value):
return sys.getsizeof(value)

HTTP Authentication
-------------------
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

async_require = ["httpx>=0.15.0"]

ttlcache_require = ["cachetools==5.3.1"]

xmlsec_require = [
"xmlsec>=0.6.1",
]
Expand Down
49 changes: 49 additions & 0 deletions src/zeep/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@
except ImportError:
sqlite3 = None # type: ignore

try:
from cachetools import TTLCache as Cache
except ImportError:
Cache = None

_ttl_cache = None


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -164,6 +172,47 @@ def get(self, url):
logger.debug("Cache MISS for %s", url)


class TTLCache(Base):
"""LRU Cache implementation with per-item time-to-live (TTL) value."""

def __init__(self, maxsize: float, ttl: float, getsizeof: None = None):
"""Initialize TTLCache.

Args:
maxsize (float): The maximum size of the cache. When this size is reached, least recently used items will be discarded from the cache.
The unit of measurement for maxsize is number of items, not bytes by default. This implementation can be overriden using the getsizeof method.
ttl (float): The time-to-live value of the cache’s items. After ttl seconds, the entry will expire and be removed from the cache.
getsizeof (None, optional): In general, a cache’s size is the total size of its item’s values. Therefore, Cache provides a getsizeof() method,
which returns the size of a given value. The default implementation of getsizeof() returns 1 irrespective of its argument, making the cache’s size
equal to the number of its items, or len(cache). Defaults to None.
"""
if Cache is None:
raise RuntimeError("cachetools module is required for the TTLCache")
global _ttl_cache
if _ttl_cache is None:
logger.debug("Initializing TTL Cache.")
_ttl_cache = Cache(maxsize=maxsize, ttl=ttl, getsizeof=getsizeof)
else:
logger.debug("TTL Cache already initialized.")

def add(self, url, content):
logger.debug("Caching contents of %s", url)
if not isinstance(content, (str, bytes)):
raise TypeError(
f"a bytes-like object is required, not {type(content).__name__}"
)
_ttl_cache[url] = content
logger.debug("Cached contents of %s", url)

def get(self, url):
try:
content = _ttl_cache[url]
logger.debug("Cache HIT for %s", url)
return content
except KeyError:
logger.debug("Cache MISS for %s", url)
return None

def _is_expired(value, timeout):
"""Return boolean if the value is expired"""
if timeout is None:
Expand Down
64 changes: 64 additions & 0 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import time

import freezegun
import pytest
Expand Down Expand Up @@ -89,3 +90,66 @@ def test_has_not_expired(self):
value = utcnow - datetime.timedelta(seconds=timeout)
with freezegun.freeze_time(utcnow):
assert cache._is_expired(value, timeout) is False

def test_ttl_cache():
c = cache.TTLCache(maxsize=5, ttl=10)
c.add("http://tests.python-zeep.org/example.wsdl", b"content")

result = c.get("http://tests.python-zeep.org/example.wsdl")
assert result == b"content"
cache._ttl_cache = None


def test_ttl_cache_no_records():
c = cache.TTLCache(maxsize=5, ttl=10)
result = c.get("http://tests.python-zeep.org/example.wsdl")
assert result is None
cache._ttl_cache = None


def test_ttl_cache_has_not_expired():
c = cache.TTLCache(maxsize=5, ttl=10)
c.add("http://tests.python-zeep.org/example.wsdl", b"content")
freeze_dt = datetime.datetime.utcnow() + datetime.timedelta(seconds=2)
with freezegun.freeze_time(freeze_dt):
result = c.get("http://tests.python-zeep.org/example.wsdl")
assert result == b"content"
cache._ttl_cache = None


def test_ttl_cache_max_size_reached():
max_size = 3
c = cache.TTLCache(maxsize=max_size, ttl=2)
for i in range(0, 5):
c.add(f"http://tests.python-zeep.org/example{i}.wsdl", b"content")
result = c.get("http://tests.python-zeep.org/example0.wsdl")
assert result is None
result = c.get("http://tests.python-zeep.org/example1.wsdl")
assert result is None
cache._ttl_cache = None


def test_ttl_cache_share_data():
a = cache.TTLCache(maxsize=5, ttl=10)
b = cache.TTLCache(maxsize=5, ttl=10)
a.add("http://tests.python-zeep.org/example.wsdl", b"content")

result = b.get("http://tests.python-zeep.org/example.wsdl")
assert result == b"content"
cache._ttl_cache = None


def test_ttl_cache_invalid_type():
a = cache.TTLCache(maxsize=5, ttl=10)
with pytest.raises(TypeError):
a.add("http://tests.python-zeep.org/example.wsdl", 123456)
cache._ttl_cache = None


def test_ttl_cache_has_expired():
c = cache.TTLCache(maxsize=5, ttl=0.001)
c.add("http://tests.python-zeep.org/example.wsdl", b"content")
time.sleep(0.002) # Adding sleep because freezegun won't work here: https://github.com/spulec/freezegun/issues/477
result = c.get("http://tests.python-zeep.org/example.wsdl")
assert result is None
cache._ttl_cache = None