Skip to content

Commit 1296c99

Browse files
committed
feat(thumbnails): add cover image support with rero-invenio-thumbnails
* Add rero-invenio-thumbnails dependency for book cover retrieval * Configure RERO_INVENIO_THUMBNAILS_FILES_DIR in config.py * Fix Flask endpoint name: api_thumbnails.get_thumbnail * Fix URL parameter bug: weight → width API Endpoints: * Add /cover/<isbn> API endpoint for cover image retrieval - Supports ISBN-10 and ISBN-13 formats - Accepts cached, width, and height query parameters - Returns JSON with thumbnail URL and metadata Template Filters: * Add get_cover_art template filter for Jinja2 templates - Supports electronicLocator coverImage URLs - Falls back to ISBN-based thumbnail lookup - Handles multiple ISBNs in sorted order CLI Commands: * Add add_cover_urls command to bulk process documents - Supports --commit flag for database persistence - Supports --cached/--no-cached for thumbnail caching - Supports --scroll for Elasticsearch timeout configuration - Supports --pids option for processing specific documents - Excludes documents already having cover images - Sorts PIDs numerically for consistent processing Document Extensions: * Add AddCoverUrlExtension for automatic cover URL injection - Automatically adds cover URLs on document create/commit - Checks for existing cover images to avoid duplicates - Configurable caching support Tasks: * Add add_cover_urls shared task for batch processing - Used by CLI command and can be called programmatically - Supports filtering by PIDs or processing all documents - Updates timestamp tracking for monitoring Co-Authored-by: Peter Weber <[email protected]>
1 parent 382f6b0 commit 1296c99

File tree

20 files changed

+815
-93
lines changed

20 files changed

+815
-93
lines changed

data/thumbnails/9782607000423.jpg

22.7 KB
Loading

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ dependencies = [
5252
## RERO specific python modules
5353
"rero-invenio-base (>=0.3.0)",
5454
"rero-invenio-files (>=1.0.0,<2.0.0)",
55+
"rero-invenio-thumbnails @ git+https://github.com/rero/rero-invenio-thumbnails.git",
5556
"flask-wiki (>=1.0.0)",
5657
## RERO ILS specific python modules
5758
"PyYAML (>=5.3.1)",

rero_ils/config.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3499,6 +3499,9 @@ def _(x):
34993499
#: page is displayed on RERO-ILS frontpage.
35003500
RERO_ILS_UI_GIT_HASH = None
35013501

3502+
#: Thumbnails directory configuration
3503+
RERO_INVENIO_THUMBNAILS_FILES_DIR = "./data/thumbnails"
3504+
35023505
#: RERO_ILS MEF base url could be changed.
35033506
RERO_ILS_MEF_REF_BASE_URL = os.environ.get("RERO_ILS_MEF_REF_BASE_URL", "mef.rero.ch")
35043507
#: RERO_ILS MEF specific configurations.
@@ -3536,8 +3539,6 @@ def _(x):
35363539

35373540
RERO_ILS_HELP_PAGE = "https://github.com/rero/rero-ils/wiki/Public-demo-help"
35383541

3539-
#: Cover service
3540-
RERO_ILS_THUMBNAIL_SERVICE_URL = "https://services.test.rero.ch/cover"
35413542

35423543
#: Entities
35433544
RERO_ILS_AGENTS_SOURCES = ["idref", "gnd", "rero"]

rero_ils/modules/cli/utils.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
import yaml
3939
from celery import current_app as current_celery
4040
from dojson.contrib.marc21.utils import create_record
41-
from elasticsearch_dsl.query import Q
4241
from flask import current_app
4342
from flask.cli import with_appcontext
4443
from invenio_db import db
@@ -55,9 +54,9 @@
5554
from werkzeug.local import LocalProxy
5655
from werkzeug.security import gen_salt
5756

58-
from rero_ils.modules.documents.api import Document, DocumentsSearch
57+
from rero_ils.modules.documents.api import Document
5958
from rero_ils.modules.documents.dojson.contrib.marc21tojson.rero import marc21
60-
from rero_ils.modules.documents.views import get_cover_art
59+
from rero_ils.modules.documents.tasks import add_cover_urls as task_add_cover_urls
6160
from rero_ils.modules.entities.remote_entities.api import RemoteEntity
6261
from rero_ils.modules.files.cli import load_files
6362
from rero_ils.modules.items.api import Item
@@ -1579,26 +1578,25 @@ def token_create(name, user, scopes, internal, access_token):
15791578
click.secho("No user found", fg="red")
15801579

15811580

1582-
@utils.command("add_cover_urls")
1583-
@click.option("-v", "--verbose", "verbose", is_flag=True, default=False)
1581+
@utils.command()
1582+
@click.option("-c", "--commit", "commit", is_flag=True, default=False, help="Commit changes to database.")
1583+
@click.option("-v", "--verbose", "verbose", is_flag=True, default=False, help="Verbose print.")
1584+
@click.option("--cached/--no-cached", "cached", default=True, help="Use cached thumbnails.")
1585+
@click.option("-s", "--scroll", "scroll", default="60m", help="Elasticsearch scroll timeout.")
1586+
@click.option(
1587+
"-p", "--pids", "pids_file", type=click.File("r"), default=None, help="File with document pids to process."
1588+
)
15841589
@with_appcontext
1585-
def add_cover_urls(verbose):
1590+
def add_cover_urls(commit, verbose, cached, scroll, pids_file):
15861591
"""Add cover urls to all documents with isbns."""
1587-
click.secho("Add cover urls.", fg="green")
1588-
search = (
1589-
DocumentsSearch()
1590-
.filter("term", identifiedBy__type="bf:Isbn")
1591-
.filter("bool", must_not=[Q("term", electronicLocator__content="coverImage")])
1592-
.params(preserve_order=True)
1593-
.sort({"pid": {"order": "asc"}})
1594-
.source("pid")
1595-
)
1596-
for idx, hit in enumerate(search.scan()):
1597-
pid = hit.pid
1598-
record = Document.get_record_by_pid(pid)
1599-
url = get_cover_art(record=record, save_cover_url=True)
1592+
1593+
pids = None
1594+
if pids_file:
1595+
pids = [line.strip() for line in pids_file if line.strip()]
16001596
if verbose:
1601-
click.echo(f"{idx}:\tdocument: {pid}\t{url}")
1597+
click.secho(f"Processing {len(pids)} documents from file", fg="green")
1598+
1599+
return task_add_cover_urls(commit=commit, verbose=verbose, cached=cached, scroll=scroll, pids=pids)
16021600

16031601

16041602
@utils.command()

rero_ils/modules/documents/api.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242

4343
from .dumpers import document_indexer_dumper, document_replace_refs_dumper
4444
from .extensions import (
45+
AddCoverUrlExtension,
4546
AddMEFPidExtension,
4647
EditionStatementExtension,
4748
ProvisionActivitiesExtension,
@@ -115,13 +116,14 @@ class Document(IlsRecord):
115116
enable_jsonref = False
116117

117118
_extensions = [
118-
OperationLogObserverExtension(),
119+
AddCoverUrlExtension(),
119120
AddMEFPidExtension("subjects", "contribution", "genreForm"),
121+
DeleteRelatedLocalFieldExtension(),
122+
EditionStatementExtension(),
123+
OperationLogObserverExtension(),
120124
ProvisionActivitiesExtension(),
121125
SeriesStatementExtension(),
122-
EditionStatementExtension(),
123126
TitleExtension(),
124-
DeleteRelatedLocalFieldExtension(),
125127
]
126128

127129
def _validate(self, **kwargs):
@@ -490,13 +492,16 @@ def document_types(self):
490492
document_types.append(main_type)
491493
return document_types or ["docmaintype_other"]
492494

493-
def add_cover_url(self, url, dbcommit=False, reindex=False):
495+
def add_cover_url(self, url, dbcommit=False, reindex=False, force=False):
494496
"""Adds electronicLocator with coverImage to document."""
495497
electronic_locators = self.get("electronicLocator", [])
496498
for electronic_locator in electronic_locators:
497499
e_content = electronic_locator.get("content")
498500
e_type = electronic_locator.get("type")
499-
if e_content == "coverImage" and e_type == "relatedResource" and electronic_locator.get("url") == url:
501+
if e_content == "coverImage" and e_type == "relatedResource":
502+
if force:
503+
electronic_locator["url"] = url
504+
break
500505
return self, False
501506
electronic_locators.append({"content": "coverImage", "type": "relatedResource", "url": url})
502507
self["electronicLocator"] = electronic_locators

rero_ils/modules/documents/api_views.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,25 @@
2424
from flask import request as flask_request
2525
from invenio_jsonschemas import current_jsonschemas
2626
from invenio_jsonschemas.errors import JSONSchemaNotFound
27+
from rero_invenio_thumbnails import get_thumbnail_url
2728

2829
from rero_ils.modules.decorators import check_logged_as_librarian
2930

3031
from ..utils import cached
3132
from .api import Document
32-
from .utils import get_remote_cover
3333

3434
api_blueprint = Blueprint("api_documents", __name__, url_prefix="/document")
3535

3636

3737
@api_blueprint.route("/cover/<isbn>")
38-
@cached(timeout=5 * 60, query_string=True) # 5 minutes timeout
3938
def cover(isbn):
4039
"""Document cover service."""
41-
return jsonify(get_remote_cover(isbn))
40+
41+
cached = flask_request.args.get("cached", default="true").lower() != "false"
42+
url, provider = get_thumbnail_url(isbn, cached=cached)
43+
if url:
44+
return jsonify({"success": True, "image": url, "isbn": isbn, "provider": provider})
45+
return jsonify({"success": False, "isbn": isbn})
4246

4347

4448
@api_blueprint.route("/<pid>/availability", methods=["GET"])

rero_ils/modules/documents/dojson/contrib/marc21tojson/loc/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
#
33
# RERO ILS
4-
# Copyright (C) 2019-2022 RERO
4+
# Copyright (C) 2019-2026 RERO
55
# Copyright (C) 2019-2022 UCLOUVAIN
66
#
77
# This program is free software: you can redistribute it and/or modify

rero_ils/modules/documents/dojson/contrib/marc21tojson/slsp/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# -*- coding: utf-8 -*-
22
#
33
# RERO ILS
4-
# Copyright (C) 2019-2022 RERO
4+
# Copyright (C) 2019-2026 RERO
55
# Copyright (C) 2019-2022 UCLOUVAIN
66
#
77
# This program is free software: you can redistribute it and/or modify

rero_ils/modules/documents/extensions/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818

1919
"""Document record extensions."""
2020

21+
from .add_cover_url import AddCoverUrlExtension
2122
from .add_mef_pid import AddMEFPidExtension
2223
from .edition_statement import EditionStatementExtension
2324
from .provision_activities import ProvisionActivitiesExtension
2425
from .series_statement import SeriesStatementExtension
2526
from .title import TitleExtension
2627

2728
__all__ = (
29+
"AddCoverUrlExtension",
2830
"AddMEFPidExtension",
2931
"EditionStatementExtension",
3032
"ProvisionActivitiesExtension",
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# RERO ILS
4+
# Copyright (C) 2026 RERO
5+
#
6+
# This program is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU Affero General Public License as published by
8+
# the Free Software Foundation, version 3 of the License.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU Affero General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU Affero General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
18+
"""Document record extension to add cover URL to electronicLocator."""
19+
20+
from invenio_db import db
21+
from invenio_records.extensions import RecordExtension
22+
from rero_invenio_thumbnails import get_thumbnail_url
23+
24+
25+
class AddCoverUrlExtension(RecordExtension):
26+
"""Adds cover URL to electronicLocator based on ISBN."""
27+
28+
def __init__(self, cached=True):
29+
"""Initialization.
30+
31+
:param cached: Use cached thumbnails. Default: True.
32+
"""
33+
self.cached = cached
34+
35+
def add_cover_url(self, record):
36+
"""Add cover URL to electronicLocator if ISBN is present.
37+
38+
:param record: dict - a document record.
39+
"""
40+
41+
# Check if cover image already exists
42+
electronic_locators = record.get("electronicLocator", [])
43+
has_cover = any(loc.get("content") == "coverImage" for loc in electronic_locators)
44+
45+
if has_cover:
46+
return
47+
48+
# Get ISBNs from identifiedBy
49+
isbns = [
50+
identified_by.get("value")
51+
for identified_by in record.get("identifiedBy", [])
52+
if identified_by.get("type") == "bf:Isbn"
53+
]
54+
55+
if not isbns:
56+
return
57+
58+
# Try to get thumbnail URL for the first ISBN
59+
for isbn in sorted(isbns):
60+
url, provider = get_thumbnail_url(isbn, cached=self.cached)
61+
if url:
62+
# Add to electronicLocator
63+
record["electronicLocator"] = record.get("electronicLocator", [])
64+
record["electronicLocator"].append(
65+
{"type": "relatedResource", "content": "coverImage", "url": url, "note": provider}
66+
)
67+
break
68+
69+
def post_create(self, record):
70+
"""Called after a record is initialized.
71+
72+
:param record: dict - the record to be modified.
73+
"""
74+
self.add_cover_url(record)
75+
if record.model:
76+
with db.session.begin_nested():
77+
record.model.data = record
78+
db.session.add(record.model)
79+
80+
def post_commit(self, record):
81+
"""Called before a record is committed.
82+
83+
:param record: dict - the record to be modified.
84+
"""
85+
self.add_cover_url(record)
86+
if record.model:
87+
with db.session.begin_nested():
88+
record.model.data = record
89+
# Note: session merge is not required as it is done by the
90+
# record.commit

0 commit comments

Comments
 (0)