rero
diff --git a/‎data/apisources.yml‎
Lines changed: 5 additions & 0 deletions b/‎data/apisources.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎data/organisations.json‎
Lines changed: 1 addition & 1 deletion b/‎data/organisations.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rero_ils/config.py‎
Lines changed: 6 additions & 0 deletions b/‎rero_ils/config.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎rero_ils/modules/api_harvester/memovs/__init__.py‎
Lines changed: 18 additions & 0 deletions b/‎rero_ils/modules/api_harvester/memovs/__init__.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎rero_ils/modules/api_harvester/memovs/api.py‎
Lines changed: 189 additions & 0 deletions b/‎rero_ils/modules/api_harvester/memovs/api.py‎
Lines changed: 189 additions & 0 deletions
diff --git a/‎rero_ils/modules/api_harvester/memovs/dojson/__init__.py‎
Lines changed: 18 additions & 0 deletions b/‎rero_ils/modules/api_harvester/memovs/dojson/__init__.py‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎rero_ils/modules/api_harvester/memovs/dojson/json/__init__.py‎
Lines changed: 22 additions & 0 deletions b/‎rero_ils/modules/api_harvester/memovs/dojson/json/__init__.py‎
Lines changed: 22 additions & 0 deletions
@@ -26,3 +26,8 @@ NJ-CANTOOK:
     url: https://bm.ebibliomedia.ch
     classname: 'rero_ils.modules.api_harvester.cantook.api.ApiCantook'
     code: 'ebibliomedia'
+
+VS-MEMO:
+    url: https://archives.memovs.ch/docs/api/
+    classname: 'rero_ils.modules.api_harvester.memovs.api.ApiMemovs'
+    code: 'memovs'
@@ -19,7 +19,7 @@
     "default_currency": "GBP",
     "current_budget_pid": "2",
     "online_harvested_source": [
-      "mv-cantook"
+      "mv-cantook", "memovs"
     ],
     "collection_enabled_on_public_view": false
   },
 
@@ -525,6 +525,12 @@ def _(x):
         "kwargs": {"name": "NJ-CANTOOK"},
         "enabled": False,
     },
+    "celery.harvest-vs-memo": {
+        "task": "rero_ils.modules.api_harvester.tasks.harvest_records",
+        "schedule": schedules.crontab(minute=55, hour=5),  # Every day at 03:33 UTC,
+        "kwargs": {"name": "VS-MEMO"},
+        "enabled": False,
+    },
 }
 
 INDEXER_BULK_REQUEST_TIMEOUT = 60
 
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# RERO ILS
+# Copyright (C) 2024 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Memovs API harvester module."""
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+#
+# RERO ILS
+# Copyright (C) 2024 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""API for Memovs audiovisual archives records."""
+
+from invenio_db import db
+from requests import codes as requests_codes
+
+from rero_ils.modules.documents.api import Document, DocumentsSearch
+from rero_ils.modules.holdings.api import Holding, HoldingsSearch, create_holding
+from rero_ils.modules.utils import JsonWriter, requests_retry_session
+
+from ..api import ApiHarvest
+from ..models import HarvestActionType
+from .dojson.json import memovs_json
+
+
+class ApiMemovs(ApiHarvest):
+    """ApiMemovs class.
+
+    Class for harvesting audiovisual archives from Memovs API.
+    """
+
+    def __init__(self, name, file_name=None, process=False, harvest_count=-1, verbose=False):
+        """Class init."""
+        super().__init__(
+            name=name,
+            process=process,
+            harvest_count=harvest_count,
+            verbose=verbose,
+        )
+        if file_name:
+            self.file = JsonWriter(file_name)
+        self._vendor = "MEMOVS"
+
+    def get_request_url(self, start_date="1990-01-01", page=1):
+        """Get request URL.
+
+        :param start_date: date from where records has to be harvested
+        :param page: page from where records have to be harvested
+        :returns: request url
+        """
+        params = f"from={start_date}&currentPage={page}"
+        return f"{self._url}?{params}"
+
+    def delete_holdings(self, document_pid):
+        """Delete holdings.
+
+        :param document_pid: document pid
+        """
+        for hold_pid in list(Holding.get_holdings_pid_by_document_pid(document_pid)):
+            if holding := Holding.get_record_by_pid(hold_pid):
+                for electronic_location in holding.get("electronic_location", []):
+                    if electronic_location.get("source") == self._code:
+                        holding.delete(dbcommit=True, delindex=True)
+                        break
+
+    def create_holdings(self, document_pid, link):
+        """Create holdings.
+
+        :param document_pid: document pid
+        :param link: link to memovs document
+        """
+        holdings = []
+        for info in self._info.values():
+            item_type_pid = info["item_type_pid"]
+            for location_pid, url in info["locations"].items():
+                if url:
+                    # Use organization-specific URL if available
+                    link_parts = link.split("/")[3:]
+                    link_parts.insert(0, url.rstrip("/"))
+                    link = "/".join(link_parts)
+                # Check if the holding already exists
+                query = (
+                    HoldingsSearch()
+                    .filter("term", document__pid=document_pid)
+                    .filter("term", location__pid=location_pid)
+                    .filter("term", holdings_type="electronic")
+                    .filter("term", electronic_location__source=self._code)
+                )
+                if query.count() == 0:
+                    holding = create_holding(
+                        document_pid=document_pid,
+                        location_pid=location_pid,
+                        item_type_pid=item_type_pid,
+                        electronic_location={"source": self._code, "uri": link},
+                        holdings_type="electronic",
+                    )
+                    holdings.append(holding)
+        db.session.commit()
+        for holding in holdings:
+            holding.reindex()
+
+    def create_update_record(self, data):
+        """Create, update or delete record.
+
+        :param data: data for record operation
+        :returns: harvested id and status
+        """
+        status = HarvestActionType.NOTSET
+        record = None
+        record_data = memovs_json.do(data)
+        if record_data.pop("deleted", None):
+            status = HarvestActionType.DELETED
+        link = record_data.pop("link", None)
+
+        # Get harvested ID
+        harvested_id = record_data.pop("pid")
+        # Check if document already exists
+        query = DocumentsSearch().filter("term", identifiedBy__value__raw=harvested_id).source(includes=["pid"])
+        try:
+            pid = next(query.scan()).pid
+        except StopIteration:
+            pid = None
+
+        if pid:
+            if doc := Document.get_record_by_pid(pid):
+                if status == HarvestActionType.DELETED:
+                    self._count_del += 1
+                    self.delete_holdings(document_pid=doc.pid)
+                    # Try to delete document
+                    doc.pop("harvested", None)
+                    if not doc.reasons_not_to_delete():
+                        doc.delete(dbcommit=True, delindex=True)
+                else:
+                    self._count_upd += 1
+                    status = HarvestActionType.UPDATED
+                    record_data["pid"] = doc.pid
+                    record = doc.replace(data=record_data, dbcommit=True, reindex=True)
+                    if link:
+                        self.create_holdings(document_pid=record.pid, link=link)
+        elif status == HarvestActionType.NOTSET:
+            self._count_new += 1
+            status = HarvestActionType.CREATED
+            record = Document.create(data=record_data, dbcommit=True, reindex=True)
+            if link:
+                self.create_holdings(document_pid=record.pid, link=link)
+        return harvested_id, status
+
+    def harvest_records(self, from_date):
+        """Harvest Memovs records.
+
+        :param from_date: record changed after this date to get
+        :returns: count and total items
+        """
+        self._count = 0
+        url = self.get_request_url(start_date=from_date, page=1)
+        request = requests_retry_session().get(url)
+
+        if request.status_code != requests_codes.ok:
+            self.verbose_print(f"Error fetching data: {request.status_code}")
+            return self._count, 0
+
+        response_data = request.json()
+        total_pages = response_data.get("totalPages", 0)
+        total_items = response_data.get("totalDocuments", 0)
+        current_page = response_data.get("currentPage", 1)
+
+        while (
+            request.status_code == requests_codes.ok
+            and current_page <= total_pages
+            and (self.harvest_count < 0 or self._count < self.harvest_count)
+        ):
+            self.verbose_print(f"API page: {current_page}/{total_pages} url: {url}")
+            self.process_records(response_data.get("documents", []))
+
+            # Get next page
+            current_page += 1
+            if current_page <= total_pages:
+                url = self.get_request_url(start_date=from_date, page=current_page)
+                request = requests_retry_session().get(url)
+                if request.status_code == requests_codes.ok:
+                    response_data = request.json()
+
+        return self._count, total_items
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+#
+# RERO ILS
+# Copyright (C) 2024 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Memovs dojson module."""
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+#
+# RERO ILS
+# Copyright (C) 2024 RERO
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+"""Memovs json transformation."""
+
+from .model import Transformation
+
+memovs_json = Transformation()