This repository has been archived by the owner on Jul 1, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdoi-crossref-ia.py
70 lines (65 loc) · 2.58 KB
/
doi-crossref-ia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/python
# -*- coding: utf-8 -*-
""" Bot to upload PDFs for a list of DOIs to the Internet Archive. """
#
# (C) Federico Leva, 2018
#
# Distributed under the terms of the MIT license.
#
__version__ = '0.1.0'
from internetarchive import upload
import os
try:
from urllib import quote_plus
except:
from urllib.parse import quote_plus
import re
import requests
s = requests.Session()
import threading
from time import sleep
convertlang = {'ar': 'Arabic', 'ch': 'Chinese', 'de': 'German', 'en': 'English', 'es': 'Spanish', 'fr': 'French', 'it': 'Italian', 'ja': 'Japanese', 'nl': 'Dutch', 'pl': 'Polish', 'pt': 'Portuguese', 'ru': 'Russian'}
def worker(doi=None):
try:
upload_doi(doi)
except:
pass
def upload_doi(doi=None):
m = s.get('https://api.crossref.org/works/{}[email protected]'.format(doi)).json()['message']
md = {
"collection": "opensource",
"licenseurl": "https://creativecommons.org/publicdomain/mark/1.0/",
"mediatype": "texts",
"subject": "journals",
"identifier-doi": doi,
"external-identifier": [doi]+(m.get('alternative-id') or []),
"originalurl": "https://doi.org/{}".format(doi),
"source": "https://api.crossref.org/works/{}".format(doi),
"article-type": m.get('type'),
"creator": "; ".join([' '.join([a.get('given', ''), a.get('family', '')]) for a in m.get('author', [])]),
"date": "-".join([str(d).zfill(2) for d in m.get('published-print', []).get('date-parts', [])[0]]),
"description": m.get('abstract', '') + '<hr>\nThis paper is in the public domain in USA. Metadata comes from the CrossRef API, see full record in the source URL below.'.format(doi, doi),
"isbn": "; ".join(m.get('ISBN', [])),
"issn": "; ".join(m.get('ISSN', [])),
"journalabbrv": m.get('short-container-title'),
"journaltitle": ' '.join(m.get('container-title', [])),
"language": convertlang.get(m.get('language'), m.get('language')),
"pagerange": m.get('page'),
"publisher": m.get('publisher'),
"publisher_location": m.get('publisher-location'),
"title": m.get('title')[0],
"volume": m.get('issue')
}
identifier = 'paper-doi-' + re.sub('[^-_A-Za-z0-9]', '_', doi)[:89]
r = upload(identifier, files={identifier+'.pdf': quote_plus(doi)+'.pdf'}, metadata=md)
if __name__ == '__main__':
dois = open('dois.txt', 'r')
for doi in dois.readlines():
doi = doi.strip()
#print("Looking up DOI: {}".format(doi))
identifier = 'paper-doi-' + re.sub('[^-_A-Za-z0-9]', '_', doi)[:89]
os.system('mv {}_text.pdf {}.pdf'.format(identifier, quote_plus(doi)))
upload_doi(doi)
sleep(1)
#threading.Thread(target=worker, args=[doi]).start()
#sleep(2)