-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPubMedArticleModule.py
73 lines (60 loc) · 2.81 KB
/
PubMedArticleModule.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from Bio import Entrez
from Bio import Medline
import json
jsonEntry = {#format for json object containing article ID, title, authors, source with journal and volume, and abstract
"articleID": "placeholder ID",
'title': "placeholder title",
'authors': "placeholder authors",
'source': "placeholder source",
'journal': "placeholder journal",
'volume': "placeholder volume",
'abstract': "placeholder abstract"
}
def resetJSONEntry():#resetting json entry to placeholder values
jsonEntry['articleID'] = "placeholder ID"
jsonEntry['title'] = "placeholder title"
jsonEntry['authors'] = "placeholder authors"
jsonEntry['source'] = "placeholder source"
jsonEntry['journal'] = "placeholder journal"
jsonEntry['volume'] = "placeholder volume"
jsonEntry['abstract'] = "placeholder abstract"
########## front end values
searchTerm = "cancer care AND human"#search term used for program, integrate with front end by having front end search query set this variable
searchAmount = 5
outputFile = "List_Of_PubMed_Articles.json"
##########
with open(outputFile, "w") as clearFile:#making json file empty
clearFile.write("")
print("Searching for " + str(searchAmount) + " PubMed articles with keywords <" + searchTerm + ">: ")
Entrez.email = "[email protected]"
streamE = Entrez.esearch(db = "pubmed", term = searchTerm, retmax = searchAmount)#searching for pubmed articles containing keywords
record = Entrez.read(streamE)
streamE.close()
idlist = record["IdList"]#getting article ids for further search
streamM = Entrez.efetch(db = "pubmed", id = idlist, rettype = "medline", retmode = "text")#get article details from article IDs
records = Medline.parse(streamM)
for ID, record in zip(idlist, records):
articleID = ID
title = record.get("TI", "?")
authors = record.get("AU", "?")
source = record.get("SO", "?")
journal = record.get("JT", "?")
volume = record.get("VI", "?")
abstract = record.get("AB", "?")#getting article details
jsonEntry['articleID'] = articleID
jsonEntry['title'] = title
jsonEntry['authors'] = authors
jsonEntry['source'] = source
jsonEntry['journal'] = journal
jsonEntry['volume'] = volume
jsonEntry['abstract'] = abstract
outputJSON = json.dumps(jsonEntry, indent = 4)#creating json object containing article title, authors, source with journal and volume, and abstract
with open(outputFile, "a") as file:
file.write(outputJSON)#writing json object of article details into json file
file.write("\n")
resetJSONEntry()
##########
#front end integrated by reading the output json file from this program; file is List_Of_PubMed_Articles.json
##########
streamM.close()
print("Successfully found " + str(searchAmount) + " PubMed articles; written article IDs, titles, authors, sources, and abstracts to " + outputFile)