Skip to content

Commit

Permalink
Merge pull request #59 from tillywoodfield/58-retry-downloads
Browse files Browse the repository at this point in the history
Add retries to download functions
  • Loading branch information
andylolz authored Mar 6, 2024
2 parents 325294a + d7b742c commit e63b00c
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 26 deletions.
44 changes: 31 additions & 13 deletions iatikit/utils/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,31 @@
import zipfile

import requests
from requests.adapters import HTTPAdapter
from urllib3 import Retry

from ..standard.codelist import CodelistSet
from .config import CONFIG
from . import helpers


http_adapter = HTTPAdapter(max_retries=Retry(total=3))


def data():
session = requests.Session()
session.mount('https://', http_adapter)
path = CONFIG['paths']['registry']
# downloads from https://iati-data-dump.codeforiati.org
download_url = 'https://iati-data-dump.codeforiati.org/download'
response = requests.get(download_url)
response = session.get(download_url)
data_url = response.text.strip()
shutil.rmtree(path, ignore_errors=True)
makedirs(path)
zip_filepath = join(path, 'iati_dump.zip')

logging.getLogger(__name__).info('Downloading all IATI registry data...')
response = requests.get(data_url, stream=True)
response = session.get(data_url, stream=True)
with open(zip_filepath, 'wb') as handler:
shutil.copyfileobj(response.raw, handler)
logging.getLogger(__name__).info('Unzipping data...')
Expand All @@ -39,6 +46,8 @@ def data():


def metadata():
session = requests.Session()
session.mount('https://', http_adapter)
logging.getLogger(__name__).info(
'Downloading metadata from the IATI registry...')
path = join(CONFIG['paths']['registry'], 'metadata')
Expand All @@ -51,7 +60,7 @@ def metadata():
'?id={org_slug}'
start = 0
while True:
j = requests.get(url_tmpl.format(start=start)).json()
j = session.get(url_tmpl.format(start=start)).json()
if len(j['result']['results']) == 0:
break
for res in j['result']['results']:
Expand All @@ -60,7 +69,7 @@ def metadata():
continue
org_name = org['name']
if not exists(join(path, org_name + '.json')):
j = requests.get(org_url_tmpl.format(org_slug=org_name)).json()
j = session.get(org_url_tmpl.format(org_slug=org_name)).json()
with open(join(path, org_name + '.json'), 'w') as f:
json.dump(j['result'], f)
dataset_name = res['name']
Expand Down Expand Up @@ -94,6 +103,9 @@ def metadata():
def _get_codelist_mappings(versions):
all_codelists = CodelistSet()

session = requests.Session()
session.mount('https://', http_adapter)

path = join(CONFIG['paths']['standard'], 'codelist_mappings')
shutil.rmtree(path, ignore_errors=True)
makedirs(path)
Expand All @@ -110,7 +122,7 @@ def _get_codelist_mappings(versions):
makedirs(mapping_path)

mapping_url = tmpl.format(version=version_path)
mappings = requests.get(mapping_url).json()
mappings = session.get(mapping_url).json()

activity_mappings = [
x for x in mappings
Expand All @@ -129,40 +141,44 @@ def _get_codelist_mappings(versions):

def codelists():
def get_list_of_codelists(version):
session = requests.Session()
session.mount('https://', http_adapter)
if version in _VERY_OLD_IATI_VERSIONS:
request = requests.get(_VERY_OLD_CODELISTS_URL)
request = session.get(_VERY_OLD_CODELISTS_URL)
# import pdb; pdb.set_trace()
list_of_codelists = [x['name'] for x in csv.DictReader(
[x.decode() for x in request.iter_lines()])]
elif version in _OLD_IATI_VERSIONS:
j = requests.get(_OLD_CODELISTS_URL).json()
j = session.get(_OLD_CODELISTS_URL).json()
list_of_codelists = [x['name'] for x in j['codelist']]
else:
codelists_url = _NEW_CODELISTS_TMPL.format(
version=version.replace('.', ''))
list_of_codelists = requests.get(codelists_url).json()
list_of_codelists = session.get(codelists_url).json()
return list_of_codelists

def get_codelist(codelist_name, version):
session = requests.Session()
session.mount('https://', http_adapter)
if version in _VERY_OLD_IATI_VERSIONS:
codelist_url = _VERY_OLD_CODELIST_TMPL.format(
codelist_name=codelist_name)
request = requests.get(codelist_url)
request = session.get(codelist_url)
codes = list(csv.DictReader(
[x.decode() for x in request.iter_lines()]))
version_codelist = {'data': codes}
elif version in _OLD_IATI_VERSIONS:
codelist_url = _OLD_CODELIST_TMPL.format(
codelist_name=codelist_name)
request = requests.get(codelist_url)
request = session.get(codelist_url)
codes = list(csv.DictReader(
[x.decode() for x in request.iter_lines()]))
version_codelist = {'data': codes}
else:
codelist_url = _NEW_CODELIST_TMPL.format(
codelist_name=codelist_name,
version=version.replace('.', ''))
version_codelist = requests.get(codelist_url).json()
version_codelist = session.get(codelist_url).json()
return version_codelist

path = join(CONFIG['paths']['standard'], 'codelists')
Expand Down Expand Up @@ -219,14 +235,16 @@ def get_codelist(codelist_name, version):


def schemas():
session = requests.Session()
session.mount('https://', http_adapter)
path = join(CONFIG['paths']['standard'], 'schemas')
shutil.rmtree(path, ignore_errors=True)
makedirs(path)

versions_url = 'https://iatistandard.org/reference_downloads/' + \
'201/codelists/downloads/clv2/json/en/' + \
'Version.json'
versions = [d['code'] for d in requests.get(versions_url).json()['data']]
versions = [d['code'] for d in session.get(versions_url).json()['data']]
versions.reverse()

logging.getLogger(__name__).info('Downloading IATI Standard schemas...')
Expand All @@ -239,7 +257,7 @@ def schemas():
makedirs(join(path, version_path))
for filename in filenames:
url = tmpl.format(version=version, filename=filename)
request = requests.get(url)
request = session.get(url)
filepath = join(path, version_path, filename)
with open(filepath, 'wb') as handler:
handler.write(request.content)
Expand Down
12 changes: 10 additions & 2 deletions iatikit/utils/helpers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import requests
from requests.adapters import HTTPAdapter
from urllib3 import Retry

http_adapter = HTTPAdapter(max_retries=Retry(total=3))


def get_iati_versions():
session = requests.Session()
session.mount('https://', http_adapter)
versions_url = 'http://reference.iatistandard.org/201/codelists/' + \
'downloads/clv2/json/en/Version.json'
versions = [d['code']
for d in requests.get(versions_url).json()['data']]
versions = [
d['code']
for d in session.get(versions_url).json()['data']
]
versions.reverse()
return versions
25 changes: 16 additions & 9 deletions tests/test_download_codelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import shutil
import tempfile
from unittest import TestCase

from mock import patch

from iatikit.utils import download
Expand All @@ -17,8 +16,10 @@ def setUp(self):
config_dict = {'paths': {'standard': self.standard_path}}
CONFIG.read_dict(config_dict)

@patch('requests.get', CodelistMockRequest)
def test_download_codelists(self):
@patch('requests.Session')
def test_download_codelists(self, mock_session):
mock_session.return_value.get.side_effect = CodelistMockRequest

download.codelists()

codelists_expected = {
Expand All @@ -34,8 +35,10 @@ def test_download_codelists(self):
codelists = json.load(handler)
assert codelists == codelists_expected

@patch('requests.get', CodelistMockRequest)
def test_download_codelist_from_until(self):
@patch('requests.Session')
def test_download_codelist_from_until(self, mock_session):
mock_session.return_value.get.side_effect = CodelistMockRequest

download.codelists()

path = join(self.standard_path, 'codelists', 'ActivityStatus.json')
Expand All @@ -47,8 +50,10 @@ def test_download_codelist_from_until(self):
assert vocabs['data']['1']['from'] == '1.01'
assert vocabs['data']['1']['until'] == '2.01'

@patch('requests.get', CodelistMockRequest)
def test_download_codelist_items(self):
@patch('requests.Session')
def test_download_codelist_items(self, mock_session):
mock_session.return_value.get.side_effect = CodelistMockRequest

download.codelists()

path = join(self.standard_path, 'codelists', 'Sector.json')
Expand All @@ -58,8 +63,10 @@ def test_download_codelist_items(self):
sector_name = 'Media and free flow of information'
assert vocabs['data']['15153']['name'] == sector_name

@patch('requests.get', CodelistMockRequest)
def test_download_codelist_mappings(self):
@patch('requests.Session')
def test_download_codelist_mappings(self, mock_session):
mock_session.return_value.get.side_effect = CodelistMockRequest

download.codelists()

path = join(self.standard_path, 'codelist_mappings')
Expand Down
6 changes: 4 additions & 2 deletions tests/test_download_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ def setUp(self):
config_dict = {'paths': {'standard': self.standard_path}}
CONFIG.read_dict(config_dict)

@patch('requests.get', MockRequest)
def test_download_schemas(self):
@patch('requests.Session')
def test_download_schemas(self, mock_session):
mock_session.return_value.get.side_effect = MockRequest

download.schemas()

filenames = [
Expand Down

0 comments on commit e63b00c

Please sign in to comment.