Skip to content

Commit 787d641

Browse files
committed
Rm deprecated wget, replace with requests to debug reldir issue
1 parent 0d05342 commit 787d641

File tree

4 files changed

+43
-24
lines changed

4 files changed

+43
-24
lines changed

repo2data/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "2.7.0"
1+
__version__ = "2.8.0"

repo2data/repo2data.py

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
"""
88
import os
99
import json
10-
import wget
10+
import requests
1111
import subprocess
1212
import re
1313
import urllib
1414
import urllib.request
1515
import patoolib
16+
import time
1617

1718

1819
class Repo2Data():
@@ -194,23 +195,39 @@ def _already_downloaded(self):
194195

195196
return dl
196197

197-
def _wget_download(self):
198-
"""Install the data with wget library"""
199-
print("Info : Starting to download with wget %s ..." %
200-
(self._data_requirement_file["src"]))
201-
# Try it few times to avoid truncated data
202-
attempts = 0
203-
while attempts < 3:
204-
# Download with standard weblink
205-
try:
206-
wget.download(
207-
self._data_requirement_file["src"], out=self._dst_path)
208-
print(" ")
209-
attempts = 999
210-
except urllib.error.ContentTooShortError:
211-
attempts = attempts + 1
212-
print("Warning : Truncated data, retry %d ..." % (attempts))
213-
pass
198+
def _url_download(self):
199+
"""
200+
Under the assumption that the download link points to
201+
a single tar/zip etc file, use requests library to
202+
downlad the data to a relative path.
203+
"""
204+
url = self._data_requirement_file["src"]
205+
directory = self._dst_path
206+
max_retries = 3
207+
retry_delay = 5
208+
for retry in range(max_retries):
209+
response = requests.get(url, stream=True)
210+
if response.status_code == 200:
211+
# Create the directory if it doesn't exist
212+
if not os.path.exists(directory):
213+
os.makedirs(directory)
214+
# Get the filename from the URL
215+
filename = url.split('/')[-1]
216+
# Path to save the file
217+
filepath = os.path.join(directory, filename)
218+
# Save the content of the response to a file
219+
with open(filepath, 'wb') as file:
220+
for chunk in response.iter_content(chunk_size=128):
221+
file.write(chunk)
222+
print(f'File downloaded to: {filepath}')
223+
return filepath
224+
else:
225+
print(f'Attempt {retry + 1} - Failed to download the file. Status code: {response.status_code}')
226+
if retry < max_retries - 1:
227+
print(f'Retrying in {retry_delay} seconds...')
228+
time.sleep(retry_delay)
229+
# If hits here means retries failed.
230+
print('Download failed after multiple attempts.')
214231

215232
def _gdrive_download(self):
216233
"""Install the data with google drive utility"""
@@ -291,13 +308,14 @@ def _osf_download(self):
291308

292309
def _scan_dl_type(self):
293310
"""Detect which function to use for download"""
294-
# if it is an http link, then we use wget
311+
# If an http link is provided or the url does not match one of the providers
312+
# (osf, google, datalad, git), then fall back to requests to download the file.
295313
if ((re.match(".*?(https://).*?", self._data_requirement_file["src"])
296314
or re.match(".*?(http://).*?", self._data_requirement_file["src"]))
297315
and not re.match(".*?(\\.git)", self._data_requirement_file["src"])
298316
and not re.match(".*?(drive\\.google\\.com).*?", self._data_requirement_file["src"])
299317
and not re.match(".*?(https://osf\\.io).*?", self._data_requirement_file["src"])):
300-
self._wget_download()
318+
self._url_download()
301319
# if the source link has a .git, we use datalad
302320
elif re.match(".*?(\\.git)", self._data_requirement_file["src"]):
303321
self._datalad_download()

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ pytest==6.2.0
66
osfclient==0.0.5
77
gdown==4.2.0
88
zenodo-get==1.3.4
9+
requests

setup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33

44
setup(name='repo2data',
55
version=repo2data.utils.get_version(),
6-
description='Automatic data fetcher from a remote server.',
6+
description='To download data from a variety of providers.',
77
url='https://github.com/SIMEXP/Repo2Data',
88
download_url='https://github.com/SIMEXP/Repo2Data/archive/v{}.tar.gz'.format(repo2data.utils.get_version()),
99
author='Loic TETREL',
10-
author_email='loic.tetrel.pro@gmail.com',
10+
author_email='roboneurolibre@gmail.com',
1111
license='MIT',
1212
packages=['repo2data'],
1313
scripts=['bin/repo2data'],
@@ -16,7 +16,7 @@
1616
'patool',
1717
#seg-fault with datalad
1818
'datalad',
19-
'wget',
19+
'requests',
2020
'osfclient',
2121
'gdown',
2222
'zenodo-get'

0 commit comments

Comments
 (0)