-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetPATSTAT.py
124 lines (110 loc) · 4.33 KB
/
getPATSTAT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Created on Feb 24 2021
Updated on Oct 2022
Authors: Jeronimo Arenas Garcia <[email protected]>
José Antonio Espinosa Melchor <[email protected]>
Download PATSTAT products using REST API
"""
import argparse
import configparser
import io
import json
import sys
import zipfile
from pathlib import Path
import requests
from dateutil import parser
def print_status(act_msg, err_msg, resp):
print(f"{act_msg}: {resp}")
if not resp.status_code == 200:
print(err_msg)
try:
print(json.dumps(json.loads(resp.content.decode()), indent=2))
except:
pass
sys.exit()
if __name__ == "__main__":
# Args
arg_parser = argparse.ArgumentParser(description="Download PATSTAT products using REST API")
arg_parser.add_argument("-c", "--config", help="Configuration file to use", default="config.cf")
arg_parser.add_argument("-p", "--path", help="Path where the datasets will be downloaded")
args = arg_parser.parse_args()
# Make sure a valid a configuration file is available
config = configparser.ConfigParser()
if not Path(args.config).is_file():
print("Please provide a valid configuration file")
sys.exit()
config.read(args.config)
# Load credentials and destination path
username = config["creds"]["user"]
password = config["creds"]["pass"]
if args.path:
download_path = Path(args.path)
else:
download_path = Path(config["data"]["path"])
if not download_path.is_dir():
print("Please provide a link to a folder for the download")
sys.exit()
# Create session
with requests.Session() as s:
# Get token
# Credentials
credentials = {
"username": username,
"password": password,
"grant_type": "password",
"scope": "openid",
}
p = s.post(
"https://login.epo.org/oauth2/aus3up3nz0N133c0V417/v1/token",
headers={
"Content-Type": "application/x-www-form-urlencoded",
"Authorization": "Basic MG9hM3VwZG43YW41cE1JOE80MTc=",
},
data=credentials,
)
print_status("Get token", "Invalid credentials", p)
token_response = json.loads(p.content.decode())
token_type = token_response["token_type"]
access_token = token_response["access_token"]
# Get products
base_uri = "https://publication-bdds.apps.epo.org/bdds/bdds-bff-service/prod/api/products/"
r = s.get(base_uri, headers={"Authorization": f"{token_type} {access_token}"})
print_status("Get products", "Invalid subscription products", r)
products = json.loads(r.content)
# Get most recent PATSTAT Global
product = [p for p in products if "PATSTAT Global" in p["name"]]
if not product:
print("These credentials do not allow access to PATSTAT Global")
sys.exit()
product = product[0]
productID = product["id"]
r = s.get(f"{base_uri}{productID}", headers={"Authorization": f"{token_type} {access_token}"})
print_status("Get product", "Invalid subscription product", r)
data_dict = json.loads(r.content)
# Filter most recent
most_recent = sorted(
data_dict["deliveries"], key=lambda x: parser.parse(x["deliveryPublicationDatetime"]), reverse=True,
)[0]
deliveryId = most_recent["deliveryId"]
# Create download directory
edition = "_".join(most_recent["deliveryName"].split()[-2:])
version_path = download_path.joinpath(f"{edition}")
if version_path.exists():
print("This version has already been downloaded.")
print("Closing...")
sys.exit()
else:
# Download
version_path.mkdir(parents=True)
for file in most_recent["files"]:
# Save and extract files
fileId = file["fileId"]
print(f'Downloading: {file["fileName"]}')
print(file)
r = s.get(
f"{base_uri}{productID}/delivery/{deliveryId}/file/{fileId}/download",
headers={"Authorization": f"{token_type} {access_token}"},
)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(version_path)