6
6
DBLP_PID_ENTRYPOINT = "https://dblp.dagstuhl.de/pid/"
7
7
8
8
class Publication :
9
- def __init__ (self , obj , bibstr ):
9
+ def __init__ (self , obj , bibsByKey ):
10
10
self .title = obj .find (".//title" ).text #Title acts as ID
11
- self .bibstr = bibstr
12
-
13
11
try :
14
- self .authorPIDs = [ o . attrib [ "name" ] for o in obj .find (".//author" )]
12
+ self .year = int ( obj .find (".//year" ). text )
15
13
except Exception :
16
- try : #If author is not present, fallback to editors
17
- self . authorPIDs = [ o . attrib [ "name" ] for o in obj . find ( ".//editor" )]
18
- except Exception :
19
- self . authorPIDs = []
14
+ self . year = 0
15
+
16
+ self . key = obj [ 0 ]. attrib [ "key" ] #Key is the first child of the publication, used to fetch BibTeX string
17
+ self . bibstr = bibsByKey [ "DBLP:" + self . key ] #BibTeX string for this publication
20
18
21
19
try :
22
- self .year = int ( obj .find (".//year" ). text )
20
+ self .authorPIDs = [ o . attrib [ "name" ] for o in obj .find (".//author" )]
23
21
except Exception :
24
- self .year = 0
22
+ self .authorPIDs = []
23
+ #We don't want to include editorships in the publication list, having a pub with no authors will be pruned
24
+ # try: #If author is not present, fallback to editors
25
+ # self.authorPIDs = [o.attrib["name"] for o in obj.find(".//editor")]
26
+ # except Exception:
27
+ # self.authorPIDs = []
25
28
26
29
def __eq__ (self , other ):
27
30
return self .title == other .title
@@ -42,7 +45,11 @@ def __init__(self, obj) -> None:
42
45
xmlTree = ET .fromstring (rawXML )
43
46
xmlPubs = xmlTree .findall (".//r" ) #This has to work since being in DBLP means having at least one publication
44
47
assert len (xmlPubs ) == len (bibs ), "Number of publications in XML and BIB file do not match"
45
- self .pubs = [Publication (x , b ) for x , b in zip (xmlPubs , bibs )]
48
+
49
+ #The key is after the first { to the first , in the bib entry
50
+ bibsByKey = {bib .split ("{" )[1 ].split ("," )[0 ]: bib for bib in bibs }
51
+
52
+ self .pubs = [Publication (x , bibsByKey ) for x in xmlPubs ]
46
53
47
54
def pubsByYear (self ):
48
55
"""Returns a list of publications, partially sorted by year."""
@@ -66,10 +73,10 @@ def __hash__(self):
66
73
pubAuthors = {} #Maps a publication title to its RAIR affiliated authors
67
74
for m in members :
68
75
for p in m .pubs :
69
- if p .title in pubAuthors :
70
- pubAuthors [p .title ][1 ].append (m )
76
+ if p .key in pubAuthors :
77
+ pubAuthors [p .key ][1 ].append (m )
71
78
else :
72
- pubAuthors [p .title ] = [p , [m ]]
79
+ pubAuthors [p .key ] = [p , [m ]]
73
80
74
81
#Prunes members from a papers author list if they are not a "current member".
75
82
#Being a current member at time of publication is defined as 8 years away from a member's first publication
@@ -87,8 +94,8 @@ def __hash__(self):
87
94
for p in m .pubs :
88
95
if p .year - firstSelmerPub .year > 8 :
89
96
break
90
- if p .title in pubAuthors :
91
- pubAuthors [p .title ][1 ].remove (m )
97
+ if p .key in pubAuthors :
98
+ pubAuthors [p .key ][1 ].remove (m )
92
99
93
100
#Prunes publications with less than 2 current members as authors
94
101
pubAuthors2 = {k : v for k , v in pubAuthors .items () if len (v [1 ]) > 1 }
0 commit comments