1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Thu Mar 14 20:43:21 2019
5
+
6
+ @author: Zargham
7
+ """
8
+
9
+ import networkx as nx
10
+ import pandas as pd
11
+ import numpy as np
12
+
13
+ #defaults
14
+ default_self_loop_wt = .001
15
+
16
+ def update_score (g ,alpha ,seed , lazy = False , lazy_wt = .5 ):
17
+
18
+ #lazy random walk assumes a topology independent 1/2 wt on self-loops
19
+ lazy_wt = lazy_wt * float (lazy )
20
+
21
+ prior_x = nx .get_node_attributes (g ,'score' )
22
+ for n in g .nodes :
23
+ self_wt = g .nodes [n ]['self_wt' ]/ g .nodes [n ]['total_wt' ]
24
+
25
+ val = (1 - alpha )* self_wt * prior_x [n ] + alpha * seed [n ]
26
+ for nb in g .nodes [n ]['out_nbr' ]:
27
+ #outbound neighbor
28
+ e_count = edge_count (g , n ,nb )
29
+ for e3 in range (e_count ):
30
+ wt = g .edges [(n ,nb ,e3 )]['out_weight' ]/ g .nodes [nb ]['total_wt' ]
31
+ val = val + (1 - alpha )* wt * prior_x [nb ]
32
+
33
+ for nb in g .nodes [n ]['in_nbr' ]:
34
+ #inbound neighbor
35
+ e_count = edge_count (g , nb ,n )
36
+ for e3 in range (e_count ):
37
+ wt = g .edges [(nb ,n ,e3 )]['in_weight' ]/ g .nodes [nb ]['total_wt' ]
38
+ val = val + (1 - alpha )* wt * prior_x [nb ]
39
+
40
+ #print(val)
41
+
42
+ g .nodes [n ]['score' ]= lazy_wt * prior_x [n ]+ (1 - lazy_wt )* val
43
+
44
+ return g
45
+
46
+ #helper function
47
+ def edge_count (g ,src ,dst ):
48
+ i = 0
49
+ stop = False
50
+ while not (stop ):
51
+ try :
52
+ g .edges [(src ,dst ,i )]
53
+ i = i + 1
54
+ except :
55
+ stop = True
56
+ return i
57
+
58
+ #tuples are (to_weight, from_weight)
59
+ default_edge_wt_by_type = {
60
+ 'github/authors' : (0.5 ,1 ),
61
+ 'github/hasParent' :(1 ,1 / 4 ),
62
+ 'git/hasParent' :(1 ,1 / 4 ),
63
+ 'github/mentionsAuthor' : (1 ,1 / 32 ),
64
+ 'github/mergedAs' :(.5 ,1 ),
65
+ 'github/references' :(1 ,1 / 16 ),
66
+ 'github/reactsHeart' :(2 ,1 / 32 ),
67
+ 'github/reactsHooray' :(4 ,1 / 32 ),
68
+ 'github/reactsRocket' :(1 ,0 ), #appears to be missing from current implementation
69
+ 'github/reactsThumbsUp' :(1 ,1 / 32 )
70
+ }
71
+
72
+ default_node_wt_by_type = {
73
+ 'github/issue' :2.0 ,
74
+ 'github/repo' :4.0 ,
75
+ 'github/comment' : 1.0 ,
76
+ 'git/commit' :2.0 ,
77
+ 'github/user' :1.0 ,
78
+ 'github/bot' :1.0 ,
79
+ 'github/review' : 1.0 ,
80
+ 'github/pull' : 4.0
81
+ }
82
+
83
+
84
+ def wt_heuristic (g ,
85
+ node_wt_by_type = default_node_wt_by_type ,
86
+ edge_wt_by_type = default_edge_wt_by_type ,
87
+ self_loop_wt = default_self_loop_wt ):
88
+
89
+ for e in g .edges :
90
+ e_wts = edge_wt_by_type [g .edges [e ]['type' ]]
91
+ src_wt = node_wt_by_type [g .nodes [e [0 ]]['type' ]]
92
+ dst_wt = node_wt_by_type [g .nodes [e [1 ]]['type' ]]
93
+
94
+ g .edges [e ]['in_weight' ] = e_wts [0 ]* dst_wt
95
+ g .edges [e ]['out_weight' ] = e_wts [1 ]* src_wt
96
+
97
+ '''
98
+ for n in g.nodes:
99
+ wt = self_loop_wt
100
+ for nb in nx.all_neighbors(g,n):
101
+ #outbound neighbor
102
+ if nb in g.neighbors(n):
103
+ e_count = edge_count(g,n,nb)
104
+ for e3 in range(e_count):
105
+ wt = wt + g.edges[(n,nb,e3)]['out_weight']
106
+ #inbound neighbor
107
+ else:
108
+ e_count = edge_count(g,nb,n)
109
+ for e3 in range(e_count):
110
+ wt = wt + g.edges[(nb,n,e3)]['in_weight']
111
+
112
+ g.nodes[n]['denominator']=wt
113
+ '''
114
+
115
+ #create neighborhoods
116
+ for n in g .nodes :
117
+ g .nodes [n ]['all_nbr' ]= set (nx .all_neighbors (g ,n ))
118
+ g .nodes [n ]['in_nbr' ] = set ()
119
+ g .nodes [n ]['out_nbr' ] = set ()
120
+ for nb in g .nodes [n ]['all_nbr' ]:
121
+ #print((n,nb))
122
+ try :
123
+ g .edges [(nb ,n ,0 )]
124
+ g .nodes [n ]['in_nbr' ].add (nb )
125
+ except :
126
+ pass
127
+ try :
128
+ g .edges [(n ,nb ,0 )]
129
+ g .nodes [n ]['out_nbr' ].add (nb )
130
+ except :
131
+ pass
132
+
133
+ for n in g .nodes :
134
+ self_wt = self_loop_wt #/g.nodes[n]['denominator']
135
+ g .nodes [n ]['self_wt' ]= self_wt
136
+ total_wt = self_wt
137
+ for nb in g .nodes [n ]['out_nbr' ]:
138
+ #outbound neighbor
139
+ e_count = edge_count (g , n ,nb )
140
+ for e3 in range (e_count ):
141
+ wt = g .edges [(n ,nb ,e3 )]['in_weight' ]#/g.nodes[nb]['denominator']
142
+ #g.edges[(n,nb,e3)]['normalized_out_wt']=wt
143
+ total_wt = total_wt + wt
144
+
145
+ for nb in g .nodes [n ]['in_nbr' ]:
146
+ #inbound neighbor
147
+ e_count = edge_count (g , nb ,n )
148
+ for e3 in range (e_count ):
149
+ wt = g .edges [(nb ,n ,e3 )]['out_weight' ]#/g.nodes[nb]['denominator']
150
+ #g.edges[(nb,n,e3)]['normalized_in_wt']=wt
151
+ total_wt = total_wt + wt
152
+
153
+
154
+ g .nodes [n ]['total_wt' ] = total_wt
155
+
156
+ return g
157
+
158
+ def pageRanker (g ,
159
+ alpha ,
160
+ K ,
161
+ seed = None ,
162
+ initial_value = None ,
163
+ lazy = False ,
164
+ lazy_wt = .5 ,
165
+ lazy_decay = True ,
166
+ self_loop_wt = default_self_loop_wt ,
167
+ node_wt_by_type = default_node_wt_by_type ,
168
+ edge_wt_by_type = default_edge_wt_by_type ):
169
+
170
+ #improve input verification for seed
171
+ #must be dict keyed to nodes
172
+ #with non-negative floating point values summing to 1
173
+ if seed == None :
174
+ N = len (g .nodes )
175
+ seed = {n :1.0 / N for n in g .nodes }
176
+
177
+ #improve input verification for initial value
178
+ #must be dict keyed to nodes
179
+ #with non-negative floating point values summing to 1
180
+ if initial_value == None :
181
+ initial_value = seed
182
+
183
+ for n in g .nodes :
184
+ g .nodes [n ]['score' ] = initial_value [n ]
185
+
186
+ g = wt_heuristic (g ,
187
+ node_wt_by_type = node_wt_by_type ,
188
+ edge_wt_by_type = edge_wt_by_type ,
189
+ self_loop_wt = self_loop_wt )
190
+
191
+ #print(g.nodes[0])
192
+
193
+ x_dict = {0 :initial_value }
194
+ for k in range (0 ,K ):
195
+ g = update_score (g ,
196
+ alpha ,
197
+ seed ,
198
+ lazy ,
199
+ lazy_wt * (1 - int (lazy_decay )* k / (k + 3 )))
200
+ x_dict [k + 1 ] = nx .get_node_attributes (g ,'score' )
201
+
202
+
203
+ #result in numpy array format
204
+ pr = np .array (list (x_dict [K ].values ()))
205
+
206
+ #trajectory in pandas dataframe format
207
+ df = pd .DataFrame (x_dict ).T
208
+ return pr ,df , g
0 commit comments