-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathSpringerCsv2Bib.py
120 lines (97 loc) · 3.86 KB
/
SpringerCsv2Bib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import sys
sys.path.insert(0, './pybtex/')
from pybtex.database import parse_file, parse_string
from pybtex.database import BibliographyData, Entry, Person
import io
import csv
import pandas as pd
import argparse
import unidecode
from shutil import copyfile
import tempfile
#=============================================================
def TypePaperSelect(type_tmp):
typePaper = 'InProceedings'
if (type_tmp=='Article'):
typePaper = 'article'
elif (type_tmp=='Chapter'):
typePaper = 'InProceedings'
return type_tmp
#=============================================================
def AuthorFix(author_tmp):
#problems with spring CSV
# "Sergey Ablameyko PhD, DSc, Prof, FIEE, FIAPR, SMIEEETony Pridmore BSc, PhD"
# correct is
# "Sergey Ablameyko and Tony Pridmore"
author_tmp = author_tmp.replace(","," ")
author_tmp = author_tmp.replace("PhD","")
author_tmp = author_tmp.replace("DSc","")
author_tmp = author_tmp.replace("Prof","")
author_tmp = author_tmp.replace("FIEE","")
author_tmp = author_tmp.replace("FIAPR","")
author_tmp = author_tmp.replace("SMIEEE","")
author_tmp = author_tmp.replace(" "," ")
# "Yingying ZhuCong YaoXiang Bai"
# correct is
# "Yingying Zhu and Cong Yao and Xiang Bai"
last_word_isalpha = False
author = ""
for word in author_tmp:
is_uppercase = word.isupper() and word.isalpha()
if (is_uppercase and last_word_isalpha):
author = author + " and "
author = author + word
last_word_isalpha = word.islower() and word.isalpha()
return author
#=============================================================
def run(csvFileName, bibFileName):
if not os.path.isfile(csvFileName):
print("File not found: ",csvFileName)
return
# I dont kown Why, but dont work complex path in Panda, then I copy file to local path
tmpFile = tempfile.mktemp()
copyfile(csvFileName,tmpFile)
colnames = ['title','journal','book','volume','issue','doi','author','year','url','type']
pn = pd.read_csv(tmpFile, names=colnames, skiprows=1)
bibData = BibliographyData()
total = 0
notAuthor = 0
for row_index, row in pn.iterrows():
total = total + 1
fields = []
if (not pd.isnull(row.title)):
fields.append(('title', row.title))
if (not pd.isnull(row.journal)):
fields.append(('journal', row.journal))
if (not pd.isnull(row.volume)):
fields.append(('volume', str(row.volume)))
if (not pd.isnull(row.volume)):
fields.append(('issue', str(row.issue)))
if (not pd.isnull(row.doi)):
fields.append(('doi', row.doi))
if (not pd.isnull(row.year)):
fields.append(('year', str(row.year)))
if (not pd.isnull(row.url)):
fields.append(('url', row.url))
if (not pd.isnull(row.author)):
fields.append(('author', AuthorFix(row.author)))
keyPaper = row.doi
typePaper = TypePaperSelect(row.type)
print("Chave "+keyPaper+" \r", end="", flush=True)
if (pd.isnull(row.author)):
notAuthor = notAuthor + 1
else:
bibData.entries[keyPaper] = Entry(typePaper, fields)
print("Processed ",total," ")
print("Removed without author ", notAuthor)
print("Total Final",len(bibData.entries))
bibData.to_file(bibFileName)
print("Saved file ",bibFileName)
#=============================================================================
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-c", "--csvFileName", required=True, help="CSV file name")
ap.add_argument("-b", "--bibFileName", required=True, help="BibText file name")
args = vars(ap.parse_args())
run(args["csvFileName"], args["bibFileName"])