Skip to content

Commit a3c7e1f

Browse files
authored
use constants, tidy
1 parent 7685d02 commit a3c7e1f

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

iiify/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,9 @@ def mainentry():
5757
def index():
5858
"""Lists all available book and image items on Archive.org"""
5959
q = request.args.get('q', '')
60+
cursor = request.args.get('cursor', '')
6061
fields = request.args.get('fields', '')
6162
sorts = request.args.get('sorts', '')
62-
cursor = request.args.get('cursor', '')
6363
return jsonify(getids(q, cursor=cursor, fields=fields, sorts=sorts))
6464

6565

iiify/resolver.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import xml.etree.ElementTree as ET
1313

1414
SCRAPE_API = 'https://archive.org/services/search/v1/scrape'
15-
ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php?'
15+
ADVANCED_SEARCH = 'https://archive.org/advancedsearch.php'
1616
IMG_CTX = 'http://iiif.io/api/image/2/context.json'
1717
PRZ_CTX = 'http://iiif.io/api/presentation/2/context.json'
1818
ARCHIVE = 'https://archive.org'
@@ -22,6 +22,9 @@
2222
bookreader = "http://%s/BookReader/BookReaderImages.php"
2323
URI_PRIFIX = "https://iiif.archive.org/iiif"
2424

25+
MAX_SCRAPE_LIMIT = 10_000
26+
MAX_API_LIMIT = 1_000
27+
2528
class MaxLimitException(Exception):
2629
pass
2730

@@ -35,7 +38,7 @@ def purify_domain(domain):
3538
domain = re.sub('^http:\/\/', "https://", domain)
3639
return domain if domain.endswith('/iiif/') else domain + 'iiif/'
3740

38-
def getids(q, limit=1000, cursor='', sorts='', fields=''):
41+
def getids(q, cursor='', sorts='', fields='', limit=MAX_API_LIMIT):
3942
query = "(mediatype:(texts) OR mediatype:(image))" + \
4043
((" AND %s" % q) if q else "")
4144
# 'all:1' also works
@@ -55,10 +58,9 @@ def scrape(query, fields="", sorts="", count=100, cursor="", security=True):
5558
if not query:
5659
raise ValueError("GET 'query' parameters required")
5760

58-
if int(count) > 1000 and security:
61+
if int(count) > MAX_API_LIMIT and security:
5962
raise MaxLimitException("Limit may not exceed 1000.")
6063

61-
#sorts = sorts or 'date+asc,createdate'
6264
fields = fields or 'identifier,title'
6365

6466
params = {
@@ -83,14 +85,13 @@ def search(query, page=1, limit=100, security=True, sort=None, fields=None):
8385
if int(limit) > 1000 and security:
8486
raise MaxLimitException("Limit may not exceed 1000.")
8587

86-
sort = sort or 'sort%5B%5D=date+asc&sort%5B%5D=createdate'
87-
fields = fields or 'identifier,title'
8888
return requests.get(
89-
ADVANCED_SEARCH + sort,
89+
ADVANCED_SEARCH,
9090
params={'q': query,
91+
'sort[]': sort or ['date asc', 'createdate'],
9192
'rows': limit,
9293
'page': page,
93-
'fl[]': fields,
94+
'fl[]': fields or 'identifier,title',
9495
'output': 'json',
9596
}).json()
9697

@@ -172,12 +173,12 @@ def create_collection3(identifier, domain, page=1, rows=1000):
172173

173174
addMetadata(collection, identifier, metadata['metadata'], collection=True)
174175

175-
asURL = f'https://archive.org/advancedsearch.php?q=collection%3A{identifier}&fl[]=identifier&fl[]=mediatype&fl[]=title&fl[]=description&sort[]=&sort[]=&sort[]=&rows={rows}&page={page}&output=json&save=yes'
176+
asURL = f'{ADVANCED_SEARCH}?q=collection%3A{identifier}&fl[]=identifier&fl[]=mediatype&fl[]=title&fl[]=description&sort[]=&sort[]=&sort[]=&rows={rows}&page={page}&output=json&save=yes'
176177
itemsSearch = requests.get(asURL).json()
177178
total = itemsSearch['response']['numFound']
178179
# There is a max of 10,000 items that can be retrieved from the advanced search
179-
if total > 10000:
180-
total = 10000
180+
if total > MAX_SCRAPE_LIMIT:
181+
total = MAX_SCRAPE_LIMIT
181182

182183
if len(itemsSearch['response']['docs']) == 0:
183184
return None

0 commit comments

Comments
 (0)