Skip to content

Commit

Permalink
corssref: fix query params and request retries
Browse files Browse the repository at this point in the history
  • Loading branch information
slint committed Aug 7, 2019
1 parent 34f0ff1 commit 433f1b4
Showing 1 changed file with 24 additions and 6 deletions.
30 changes: 24 additions & 6 deletions asclepias_broker/harvester/crossref.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
from ..events.api import EventAPI
from ..utils import chunks
from .proxies import current_harvester
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry


class CrossrefAPIException(Exception):
Expand Down Expand Up @@ -53,9 +55,25 @@ def __init__(self, *, id: str = None, base_url: str = None,
self.id = id
self.base_url = base_url or self.DEFAULT_API_BASE_URL
self.params = params or {}

def _transform_scholix(self, data):
"""."""
self._session = None

@property
def session(self):
"""Create a session for making HTTP requests to the API."""
if self._session is None:
_session = requests.Session()
retry = Retry(
total=5, read=5, connect=5,
backoff_factor=0.3,
status_forcelist=(500, 502, 504),
)
adapter = HTTPAdapter(max_retries=retry)
_session.mount('http://', adapter)
_session.mount('https://', adapter)
self._session = _session
return self._session

def _clean_scholix(self, data):
data.pop('Url', None)
for k in ('Source', 'Target'):
t = data[k]['Type']
Expand Down Expand Up @@ -89,14 +107,14 @@ def search_events(self, *, scholix: bool = True) -> Iterator[dict]:
raise CrossrefAPIParametersException()

while True:
resp = requests.get(url, params=params)
resp = self.session.get(url, params=params)
if not resp.ok or resp.json().get('status') != 'ok':
raise CrossrefAPIException()
payload = resp.json()
items = payload.get('message', {}).get(
'link-packages' if scholix else 'events', [])
for item in items:
yield self._transform_scholix(item) if scholix else item
yield self._clean_scholix(item) if scholix else item

cursor_id = payload.get('message', {}).get('next-cursor')
if cursor_id:
Expand All @@ -110,7 +128,7 @@ def harvest(self, eager: bool = False, no_index: bool = True):
current_datetime = datetime.now()
if last_run:
self.params.setdefault(
'from-update-date', last_run.date().isoformat())
'from-occurred-date', last_run.date().isoformat())

results = self.search_events()
for events in chunks(results, 100):
Expand Down

0 comments on commit 433f1b4

Please sign in to comment.