Skip to content
This repository was archived by the owner on Apr 30, 2020. It is now read-only.

Commit 279e5d9

Browse files
committed
copy page_ranker to infra
1 parent 3ae8df5 commit 279e5d9

File tree

1 file changed

+208
-0
lines changed

1 file changed

+208
-0
lines changed

infra/page_ranker.py

+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
#!/usr/bin/env python3
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Mar 14 20:43:21 2019
5+
6+
@author: Zargham
7+
"""
8+
9+
import networkx as nx
10+
import pandas as pd
11+
import numpy as np
12+
13+
#defaults
14+
default_self_loop_wt= .001
15+
16+
def update_score(g,alpha,seed, lazy=False, lazy_wt = .5):
17+
18+
#lazy random walk assumes a topology independent 1/2 wt on self-loops
19+
lazy_wt = lazy_wt*float(lazy)
20+
21+
prior_x = nx.get_node_attributes(g,'score')
22+
for n in g.nodes:
23+
self_wt = g.nodes[n]['self_wt']/g.nodes[n]['total_wt']
24+
25+
val = (1-alpha)*self_wt*prior_x[n] + alpha*seed[n]
26+
for nb in g.nodes[n]['out_nbr']:
27+
#outbound neighbor
28+
e_count = edge_count(g, n,nb)
29+
for e3 in range(e_count):
30+
wt = g.edges[(n,nb,e3)]['out_weight']/g.nodes[nb]['total_wt']
31+
val = val + (1-alpha)*wt*prior_x[nb]
32+
33+
for nb in g.nodes[n]['in_nbr']:
34+
#inbound neighbor
35+
e_count = edge_count(g, nb,n)
36+
for e3 in range(e_count):
37+
wt = g.edges[(nb,n,e3)]['in_weight']/g.nodes[nb]['total_wt']
38+
val = val + (1-alpha)*wt*prior_x[nb]
39+
40+
#print(val)
41+
42+
g.nodes[n]['score']= lazy_wt*prior_x[n]+(1-lazy_wt)*val
43+
44+
return g
45+
46+
#helper function
47+
def edge_count(g,src,dst):
48+
i =0
49+
stop = False
50+
while not(stop):
51+
try:
52+
g.edges[(src,dst,i)]
53+
i=i+1
54+
except:
55+
stop = True
56+
return i
57+
58+
#tuples are (to_weight, from_weight)
59+
default_edge_wt_by_type = {
60+
'github/authors': (0.5,1),
61+
'github/hasParent':(1,1/4),
62+
'git/hasParent':(1,1/4),
63+
'github/mentionsAuthor': (1,1/32),
64+
'github/mergedAs':(.5,1),
65+
'github/references':(1,1/16),
66+
'github/reactsHeart':(2,1/32),
67+
'github/reactsHooray':(4,1/32),
68+
'github/reactsRocket':(1,0), #appears to be missing from current implementation
69+
'github/reactsThumbsUp':(1,1/32)
70+
}
71+
72+
default_node_wt_by_type = {
73+
'github/issue':2.0,
74+
'github/repo':4.0,
75+
'github/comment': 1.0,
76+
'git/commit':2.0,
77+
'github/user':1.0,
78+
'github/bot':1.0,
79+
'github/review': 1.0,
80+
'github/pull': 4.0
81+
}
82+
83+
84+
def wt_heuristic(g,
85+
node_wt_by_type=default_node_wt_by_type,
86+
edge_wt_by_type=default_edge_wt_by_type,
87+
self_loop_wt=default_self_loop_wt):
88+
89+
for e in g.edges:
90+
e_wts = edge_wt_by_type[g.edges[e]['type']]
91+
src_wt = node_wt_by_type[g.nodes[e[0]]['type']]
92+
dst_wt = node_wt_by_type[g.nodes[e[1]]['type']]
93+
94+
g.edges[e]['in_weight'] = e_wts[0]*dst_wt
95+
g.edges[e]['out_weight'] = e_wts[1]*src_wt
96+
97+
'''
98+
for n in g.nodes:
99+
wt = self_loop_wt
100+
for nb in nx.all_neighbors(g,n):
101+
#outbound neighbor
102+
if nb in g.neighbors(n):
103+
e_count = edge_count(g,n,nb)
104+
for e3 in range(e_count):
105+
wt = wt + g.edges[(n,nb,e3)]['out_weight']
106+
#inbound neighbor
107+
else:
108+
e_count = edge_count(g,nb,n)
109+
for e3 in range(e_count):
110+
wt = wt + g.edges[(nb,n,e3)]['in_weight']
111+
112+
g.nodes[n]['denominator']=wt
113+
'''
114+
115+
#create neighborhoods
116+
for n in g.nodes:
117+
g.nodes[n]['all_nbr']= set(nx.all_neighbors(g,n))
118+
g.nodes[n]['in_nbr'] = set()
119+
g.nodes[n]['out_nbr'] = set()
120+
for nb in g.nodes[n]['all_nbr']:
121+
#print((n,nb))
122+
try :
123+
g.edges[(nb,n,0)]
124+
g.nodes[n]['in_nbr'].add(nb)
125+
except:
126+
pass
127+
try :
128+
g.edges[(n,nb,0)]
129+
g.nodes[n]['out_nbr'].add(nb)
130+
except:
131+
pass
132+
133+
for n in g.nodes:
134+
self_wt = self_loop_wt#/g.nodes[n]['denominator']
135+
g.nodes[n]['self_wt']=self_wt
136+
total_wt = self_wt
137+
for nb in g.nodes[n]['out_nbr']:
138+
#outbound neighbor
139+
e_count = edge_count(g, n,nb)
140+
for e3 in range(e_count):
141+
wt = g.edges[(n,nb,e3)]['in_weight']#/g.nodes[nb]['denominator']
142+
#g.edges[(n,nb,e3)]['normalized_out_wt']=wt
143+
total_wt = total_wt+wt
144+
145+
for nb in g.nodes[n]['in_nbr']:
146+
#inbound neighbor
147+
e_count = edge_count(g, nb,n)
148+
for e3 in range(e_count):
149+
wt = g.edges[(nb,n,e3)]['out_weight']#/g.nodes[nb]['denominator']
150+
#g.edges[(nb,n,e3)]['normalized_in_wt']=wt
151+
total_wt = total_wt+wt
152+
153+
154+
g.nodes[n]['total_wt'] = total_wt
155+
156+
return g
157+
158+
def pageRanker(g,
159+
alpha,
160+
K,
161+
seed=None,
162+
initial_value = None,
163+
lazy=False,
164+
lazy_wt = .5,
165+
lazy_decay = True,
166+
self_loop_wt=default_self_loop_wt,
167+
node_wt_by_type =default_node_wt_by_type,
168+
edge_wt_by_type=default_edge_wt_by_type):
169+
170+
#improve input verification for seed
171+
#must be dict keyed to nodes
172+
#with non-negative floating point values summing to 1
173+
if seed==None:
174+
N = len(g.nodes)
175+
seed = {n:1.0/N for n in g.nodes}
176+
177+
#improve input verification for initial value
178+
#must be dict keyed to nodes
179+
#with non-negative floating point values summing to 1
180+
if initial_value==None:
181+
initial_value = seed
182+
183+
for n in g.nodes:
184+
g.nodes[n]['score'] = initial_value[n]
185+
186+
g = wt_heuristic(g,
187+
node_wt_by_type=node_wt_by_type,
188+
edge_wt_by_type=edge_wt_by_type,
189+
self_loop_wt=self_loop_wt)
190+
191+
#print(g.nodes[0])
192+
193+
x_dict = {0:initial_value}
194+
for k in range(0,K):
195+
g = update_score(g,
196+
alpha,
197+
seed,
198+
lazy,
199+
lazy_wt*(1-int(lazy_decay)*k/(k+3)))
200+
x_dict[k+1] = nx.get_node_attributes(g,'score')
201+
202+
203+
#result in numpy array format
204+
pr= np.array(list(x_dict[K].values()))
205+
206+
#trajectory in pandas dataframe format
207+
df = pd.DataFrame(x_dict).T
208+
return pr,df, g

0 commit comments

Comments
 (0)