forked from RNAcentral/rnacentral-import-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlitscan-retracted-articles.py
executable file
·88 lines (73 loc) · 3.12 KB
/
litscan-retracted-articles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Copyright [2009-present] EMBL-European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import click
import json
import psycopg2
import psycopg2.extras
import requests
import time
@click.command()
@click.argument('database')
@click.argument('webhook')
def main(database, webhook):
"""
Function to find articles that have been retracted.
:param database: params to connect to the db
:param webhook: address to send message to slack channel
:return: None
"""
conn = None
try:
conn = psycopg2.connect(database)
cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
# retrieve all articles identified by LitScan
cursor.execute(""" SELECT pmcid FROM litscan_article WHERE retracted IS NOT TRUE """)
rows = cursor.fetchall()
articles = []
for row in rows:
articles.append(row[0])
# check 1000 articles at a time
step = 1000
# list of articles that have been retracted
retracted_articles = []
for sublist in range(0, len(articles), step):
check_pmcid = articles[sublist:sublist + step]
# create json object
obj = {"ids": []}
for pmcid in check_pmcid:
obj["ids"].append({"src": "PMC", "extId": pmcid})
# use the Status Update Search module of the Europe PMC RESTful API
data = requests.post("https://www.ebi.ac.uk/europepmc/webservices/rest/status-update-search", json=obj).json()
if "articlesWithStatusUpdate" in data and len(data["articlesWithStatusUpdate"]) > 0:
for item in data["articlesWithStatusUpdate"]:
if "statusUpdates" in item and "RETRACTED" in item["statusUpdates"]:
# update article
cursor.execute("UPDATE litscan_article SET retracted=TRUE WHERE pmcid=%s", (item["extId"],))
retracted_articles.append(item["extId"])
time.sleep(0.3)
# Commit the changes to the database
conn.commit()
# send a message on Slack
if retracted_articles:
message = f'{len(retracted_articles)} {"articles have" if len(retracted_articles) > 1 else "article has"} ' \
f'been retracted: {", ".join(retracted_articles)}'
requests.post(webhook, json.dumps({"text": message}))
except (ValueError, psycopg2.DatabaseError) as error:
requests.post(webhook, json.dumps({"text": error}))
finally:
if conn is not None:
conn.close()
if __name__ == "__main__":
main()