diff --git a/qfieldsync/core/cloud_api.py b/qfieldsync/core/cloud_api.py index 05040fc6..67532642 100644 --- a/qfieldsync/core/cloud_api.py +++ b/qfieldsync/core/cloud_api.py @@ -340,7 +340,13 @@ def get_user_organizations(self, username: str) -> QNetworkReply: def get_files(self, project_id: str, client: str = "qgis") -> QNetworkReply: """Get project files and their versions""" - return self.cloud_get(["files", project_id], {"client": client}) + return self.cloud_get( + ["files", project_id], + { + "client": client, + "skip_metadata": 1, + }, + ) def get_file(self, url: QUrl, local_filename: str) -> QNetworkReply: """Download file from external URL""" diff --git a/qfieldsync/core/cloud_project.py b/qfieldsync/core/cloud_project.py index 9d69637b..74305252 100644 --- a/qfieldsync/core/cloud_project.py +++ b/qfieldsync/core/cloud_project.py @@ -21,10 +21,11 @@ import hashlib +import os import sqlite3 from enum import IntFlag from pathlib import Path -from typing import Any, Dict, Iterator, List, Optional +from typing import Any, Dict, Iterator, List, Optional, Union from libqfieldsync.utils.qgis import get_qgis_files_within_dir from qgis.core import QgsProject @@ -40,8 +41,50 @@ class ProjectFileCheckout(IntFlag): LocalAndCloud = 3 +def calc_etag(filename: Union[str, Path], part_size: int = 8 * 1024 * 1024) -> str: + """Calculate ETag as in Object Storage (S3) of a local file. + + ETag is a MD5. But for the multipart uploaded files, the MD5 is computed from the concatenation of the MD5s of each uploaded part. + + See the inspiration of this implementation here: https://stackoverflow.com/a/58239738/1226137 + + Args: + filename (str): the local filename + part_size (int): the size of the Object Storage part. Most Object Storages use 8MB. Defaults to 8*1024*1024. + + Returns: + str: the calculated ETag value + """ + with open(filename, "rb") as f: + file_size = os.fstat(f.fileno()).st_size + + if file_size <= part_size: + BLOCKSIZE = 65536 + hasher = hashlib.md5() + + buf = f.read(BLOCKSIZE) + while len(buf) > 0: + hasher.update(buf) + buf = f.read(BLOCKSIZE) + + return hasher.hexdigest() + else: + # Say you uploaded a 14MB file and your part size is 5MB. + # Calculate 3 MD5 checksums corresponding to each part, i.e. the checksum of the first 5MB, the second 5MB, and the last 4MB. + # Then take the checksum of their concatenation. + # Since MD5 checksums are hex representations of binary data, just make sure you take the MD5 of the decoded binary concatenation, not of the ASCII or UTF-8 encoded concatenation. + # When that's done, add a hyphen and the number of parts to get the ETag. + md5sums = [] + for data in iter(lambda: f.read(part_size), b""): + md5sums.append(hashlib.md5(data).digest()) + + final_md5sum = hashlib.md5(b"".join(md5sums)) + + return "{}-{}".format(final_md5sum.hexdigest(), len(md5sums)) + + class ProjectFile: - def __init__(self, data: Dict[str, Any], local_dir: str = None) -> None: + def __init__(self, data: Dict[str, Any], local_dir: Optional[str] = None) -> None: self._local_dir = local_dir self._temp_dir = None self._data = data @@ -97,11 +140,18 @@ def size(self) -> Optional[int]: def sha256(self) -> Optional[str]: return self._data.get("sha256") + @property + def etag(self) -> Optional[str]: + # NOTE the etag is actually stored in the `md5sum` key by mistake + return self._data.get("md5sum") + @property def local_size(self) -> Optional[int]: if not self.local_path_exists: return + assert self.local_path + return self.local_path.stat().st_size @property @@ -129,6 +179,16 @@ def local_sha256(self) -> Optional[str]: with open(self.local_path, "rb") as f: return hashlib.sha256(f.read()).hexdigest() + @property + def local_etag(self) -> Optional[str]: + if not self.local_path_exists: + return + + assert self.local_path + assert self.local_path.is_file() + + return calc_etag(self.local_path) + def flush(self) -> None: if not self._local_dir: return @@ -286,7 +346,7 @@ def files_to_sync(self) -> Iterator[ProjectFile]: project_file.flush() # don't attempt to sync files that are the same both locally and remote - if project_file.sha256 == project_file.local_sha256: + if project_file.etag == project_file.local_etag: continue # ignore local files that are not in the temp directory