Skip to content

Commit fe85a58

Browse files
committed
Added duplicate failsafe feature to Creator
- `zim.Creator` now supports `ignore_duplicates: bool` parameter to prevent duplicates from raising exceptions - `zim.Creator.add_item`, `zim.Creator.add_redirect` and `zim.Creator.add_item_for` now supports a `duplicate_ok: bool` parameter to prevent an exception should this item/redirect be a duplicate
1 parent d266b4d commit fe85a58

File tree

3 files changed

+65
-3
lines changed

3 files changed

+65
-3
lines changed

Diff for: CHANGELOG.md

+10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ All notable changes to this project are documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html) (as of version 1.5.0).
77

8+
## [1.7.0] - 2022-08-02
9+
10+
### Added
11+
12+
- `zim.Creator` now supports `ignore_duplicates: bool` parameter to
13+
prevent duplicates from raising exceptions
14+
- `zim.Creator.add_item`, `zim.Creator.add_redirect` and `zim.Creator.add_item_for`
15+
now supports a `duplicate_ok: bool` parameter to prevent an exception
16+
should this item/redirect be a duplicate
17+
818
## [1.6.3] - 2022-08-02
919

1020
### Added

Diff for: src/zimscraperlib/zim/creator.py

+36-3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import datetime
2222
import pathlib
23+
import re
2324
import weakref
2425
from typing import Any, Callable, Dict, Optional, Tuple, Union
2526

@@ -30,6 +31,13 @@
3031
from ..types import get_mime_for_name
3132
from .items import StaticItem
3233

34+
DUPLICATE_EXC_STR = re.compile(
35+
r"^Impossible to add(.+)"
36+
r"dirent\'s title to add is(.+)"
37+
r"existing dirent's title is(.+)",
38+
re.MULTILINE | re.DOTALL,
39+
)
40+
3341

3442
def mimetype_for(
3543
path: str,
@@ -76,6 +84,7 @@ def __init__(
7684
language: Optional[str] = "eng",
7785
compression: Optional[str] = None,
7886
workaround_nocancel: Optional[bool] = True,
87+
ignore_duplicates: Optional[bool] = False,
7988
**metadata: Dict[str, Union[str, datetime.date, datetime.datetime]]
8089
):
8190
super().__init__(filename=filename)
@@ -103,6 +112,7 @@ def __init__(
103112
self.metadata = metadata
104113

105114
self.workaround_nocancel = workaround_nocancel
115+
self.ignore_duplicates = ignore_duplicates
106116

107117
def start(self):
108118
super().__enter__()
@@ -129,6 +139,7 @@ def add_item_for(
129139
is_front: Optional[bool] = None,
130140
should_compress: Optional[bool] = None,
131141
delete_fpath: Optional[bool] = False,
142+
duplicate_ok: Optional[bool] = None,
132143
callback: Optional[Union[callable, Tuple[callable, Any]]] = None,
133144
):
134145
"""Add a File or content at a specified path and get its path
@@ -178,12 +189,15 @@ def add_item_for(
178189
cb += list(callback)
179190
callback = tuple(cb)
180191

181-
self.add_item(StaticItem(**kwargs), callback)
192+
self.add_item(
193+
StaticItem(**kwargs), callback=callback, duplicate_ok=duplicate_ok
194+
)
182195
return path
183196

184197
def add_item(
185198
self,
186199
item: libzim.writer.Item,
200+
duplicate_ok: Optional[bool] = None,
187201
callback: Optional[Union[Callable, Tuple[Callable, Any]]] = None,
188202
):
189203
"""Add a libzim.writer.Item
@@ -196,8 +210,14 @@ def add_item(
196210
weakref.finalize(item, callback)
197211
else:
198212
weakref.finalize(item, *callback)
213+
214+
duplicate_ok = duplicate_ok or self.ignore_duplicates
199215
try:
200-
super().add_item(item)
216+
try:
217+
super().add_item(item)
218+
except RuntimeError as exc:
219+
if not DUPLICATE_EXC_STR.match(str(exc)) or not duplicate_ok:
220+
raise exc
201221
except Exception:
202222
if self.workaround_nocancel:
203223
self.can_finish = False # pragma: no cover
@@ -209,6 +229,7 @@ def add_redirect(
209229
target_path: str,
210230
title: Optional[str] = "",
211231
is_front: Optional[bool] = None,
232+
duplicate_ok: Optional[bool] = None,
212233
):
213234
"""Add a redirect from path to target_path
214235
@@ -217,7 +238,19 @@ def add_redirect(
217238
hints = {}
218239
if is_front is not None:
219240
hints[libzim.writer.Hint.FRONT_ARTICLE] = bool(is_front)
220-
super().add_redirection(path, title, target_path, hints)
241+
242+
duplicate_ok = duplicate_ok or self.ignore_duplicates
243+
244+
try:
245+
try:
246+
super().add_redirection(path, title, target_path, hints)
247+
except RuntimeError as exc:
248+
if not DUPLICATE_EXC_STR.match(str(exc)) or not duplicate_ok:
249+
raise exc
250+
except Exception:
251+
if self.workaround_nocancel:
252+
self.can_finish = False # pragma: no cover
253+
raise
221254

222255
def add_default_illustration(self, content: bytes):
223256
self.add_illustration(48, content)

Diff for: tests/zim/test_zim_creator.py

+19
Original file line numberDiff line numberDiff line change
@@ -458,3 +458,22 @@ def cb(*args):
458458
assert not html_file.exists()
459459
assert Store.called
460460
assert Store.called == 2
461+
462+
463+
def test_duplicates(tmp_path):
464+
with Creator(tmp_path / "test.zim") as creator:
465+
creator.add_item_for(path="A", content="A")
466+
creator.add_item_for(path="C", content="C")
467+
creator.add_redirect(path="B", target_path="A")
468+
with pytest.raises(RuntimeError, match="existing dirent's title"):
469+
creator.add_item_for(path="A", content="test2")
470+
with pytest.raises(RuntimeError, match="existing dirent's title"):
471+
creator.add_redirect(path="B", target_path="C")
472+
473+
474+
def test_ignore_duplicates(tmp_path):
475+
with Creator(tmp_path / "test.zim", ignore_duplicates=True) as creator:
476+
creator.add_item_for(path="A", content="A")
477+
creator.add_item_for(path="A", content="A2")
478+
creator.add_redirect(path="B", target_path="A")
479+
creator.add_redirect(path="B", target_path="C")

0 commit comments

Comments
 (0)