Skip to content

Commit c6baf5f

Browse files
authored
More robust download (#721)
* Support Python 3.13 * Remove scikit-misc * Add scikit-misc * docs * More robust download * Further improvements * Improved download * Remove comments
1 parent 058f2dc commit c6baf5f

File tree

5 files changed

+76
-32
lines changed

5 files changed

+76
-32
lines changed

.github/release-drafter.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
name-template: "0.10.0 🌈"
2-
tag-template: 0.10.0
1+
name-template: "0.10.1 🌈"
2+
tag-template: 0.10.1
33
exclude-labels:
44
- "skip-changelog"
55

.github/workflows/test.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ jobs:
2727
- os: ubuntu-22.04
2828
python: "3.13"
2929
run_mode: "fast"
30-
# - os: ubuntu-latest
31-
# python: "3.13"
32-
# run_mode: slow
33-
# pip-flags: "--pre"
30+
- os: ubuntu-latest
31+
python: "3.13"
32+
run_mode: slow
33+
pip-flags: "--pre"
3434

3535
env:
3636
OS: ${{ matrix.os }}

pertpy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
__author__ = "Lukas Heumos"
44
__email__ = "[email protected]"
5-
__version__ = "0.10.0"
5+
__version__ = "0.10.1"
66

77
import warnings
88

pertpy/data/_dataloader.py

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import shutil
12
import tempfile
3+
import time
24
from pathlib import Path
35
from random import choice
46
from string import ascii_lowercase
@@ -7,6 +9,7 @@
79
import requests
810
from filelock import FileLock
911
from lamin_utils import logger
12+
from requests.exceptions import RequestException
1013
from rich.progress import Progress
1114

1215

@@ -17,7 +20,10 @@ def _download( # pragma: no cover
1720
block_size: int = 1024,
1821
overwrite: bool = False,
1922
is_zip: bool = False,
20-
) -> None:
23+
timeout: int = 30,
24+
max_retries: int = 3,
25+
retry_delay: int = 5,
26+
) -> Path:
2127
"""Downloads a dataset irrespective of the format.
2228
2329
Args:
@@ -27,6 +33,9 @@ def _download( # pragma: no cover
2733
block_size: Block size for downloads in bytes.
2834
overwrite: Whether to overwrite existing files.
2935
is_zip: Whether the downloaded file needs to be unzipped.
36+
timeout: Request timeout in seconds.
37+
max_retries: Maximum number of retry attempts.
38+
retry_delay: Delay between retries in seconds.
3039
"""
3140
if output_file_name is None:
3241
letters = ascii_lowercase
@@ -35,36 +44,71 @@ def _download( # pragma: no cover
3544
if output_path is None:
3645
output_path = tempfile.gettempdir()
3746

38-
download_to_path = (
39-
f"{output_path}{output_file_name}" if str(output_path).endswith("/") else f"{output_path}/{output_file_name}"
40-
)
47+
download_to_path = Path(output_path) / output_file_name
4148

4249
Path(output_path).mkdir(parents=True, exist_ok=True)
43-
lock_path = f"{output_path}/{output_file_name}.lock"
44-
with FileLock(lock_path):
50+
lock_path = Path(output_path) / f"{output_file_name}.lock"
51+
52+
with FileLock(lock_path, timeout=300):
4553
if Path(download_to_path).exists() and not overwrite:
4654
logger.warning(f"File {download_to_path} already exists!")
47-
return
55+
return download_to_path
56+
57+
temp_file_name = Path(f"{download_to_path}.part")
58+
59+
retry_count = 0
60+
while retry_count <= max_retries:
61+
try:
62+
head_response = requests.head(url, timeout=timeout)
63+
head_response.raise_for_status()
64+
content_length = int(head_response.headers.get("content-length", 0))
65+
66+
free_space = shutil.disk_usage(output_path).free
67+
if content_length > free_space:
68+
raise OSError(
69+
f"Insufficient disk space. Need {content_length} bytes, but only {free_space} available."
70+
)
71+
72+
response = requests.get(url, stream=True)
73+
response.raise_for_status()
74+
total = int(response.headers.get("content-length", 0))
4875

49-
temp_file_name = f"{download_to_path}.part"
76+
with Progress(refresh_per_second=5) as progress:
77+
task = progress.add_task("[red]Downloading...", total=total)
78+
with Path(temp_file_name).open("wb") as file:
79+
for data in response.iter_content(block_size):
80+
file.write(data)
81+
progress.update(task, advance=len(data))
82+
progress.update(task, completed=total, refresh=True)
5083

51-
response = requests.get(url, stream=True)
52-
total = int(response.headers.get("content-length", 0))
84+
Path(temp_file_name).replace(download_to_path)
5385

54-
with Progress(refresh_per_second=5) as progress:
55-
task = progress.add_task("[red]Downloading...", total=total)
56-
with Path(temp_file_name).open("wb") as file:
57-
for data in response.iter_content(block_size):
58-
file.write(data)
59-
progress.update(task, advance=block_size)
60-
progress.update(task, completed=total, refresh=True)
86+
if is_zip:
87+
with ZipFile(download_to_path, "r") as zip_obj:
88+
zip_obj.extractall(path=output_path)
89+
return Path(output_path)
6190

62-
Path(temp_file_name).replace(download_to_path)
91+
return download_to_path
92+
except (OSError, RequestException) as e:
93+
retry_count += 1
94+
if retry_count <= max_retries:
95+
logger.warning(
96+
f"Download attempt {retry_count}/{max_retries} failed: {str(e)}. Retrying in {retry_delay} seconds..."
97+
)
98+
time.sleep(retry_delay)
99+
else:
100+
logger.error(f"Download failed after {max_retries} attempts: {str(e)}")
101+
if Path(temp_file_name).exists():
102+
Path(temp_file_name).unlink(missing_ok=True)
103+
raise
63104

64-
if is_zip:
65-
output_path = output_path or tempfile.gettempdir()
66-
with ZipFile(download_to_path, "r") as zip_obj:
67-
zip_obj.extractall(path=output_path)
68-
zip_obj.namelist()
105+
except Exception as e:
106+
logger.error(f"Download failed: {str(e)}")
107+
if Path(temp_file_name).exists():
108+
Path(temp_file_name).unlink(missing_ok=True)
109+
raise
110+
finally:
111+
if Path(temp_file_name).exists():
112+
Path(temp_file_name).unlink(missing_ok=True)
69113

70-
Path(lock_path).unlink()
114+
return Path(download_to_path)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ requires = ["hatchling"]
44

55
[project]
66
name = "pertpy"
7-
version = "0.10.0"
7+
version = "0.10.1"
88
description = "Perturbation Analysis in the scverse ecosystem."
99
readme = "README.md"
1010
requires-python = ">=3.10,<3.14"

0 commit comments

Comments
 (0)