@@ -98,118 +98,129 @@ class MaigretGraph:
98
98
def __init__ (self , graph ):
99
99
self .G = graph
100
100
101
- def add_node (self , key , value ):
101
+ def add_node (self , key , value , color = None ):
102
102
node_name = f'{ key } : { value } '
103
103
104
- params = self .other_params
104
+ params = dict ( self .other_params )
105
105
if key in SUPPORTED_IDS :
106
- params = self .username_params
106
+ params = dict ( self .username_params )
107
107
elif value .startswith ('http' ):
108
- params = self .site_params
109
-
110
- self .G .add_node (node_name , title = node_name , ** params )
111
-
112
- if value != value .lower ():
113
- normalized_node_name = self .add_node (key , value .lower ())
114
- self .link (node_name , normalized_node_name )
108
+ params = dict (self .site_params )
109
+
110
+ params ['title' ] = node_name
111
+ if color :
112
+ params ['color' ] = color
115
113
114
+ self .G .add_node (node_name , ** params )
116
115
return node_name
117
116
118
117
def link (self , node1_name , node2_name ):
119
118
self .G .add_edge (node1_name , node2_name , weight = 2 )
120
119
121
120
122
121
def save_graph_report (filename : str , username_results : list , db : MaigretDatabase ):
123
- # moved here to speed up the launch of Maigret
124
122
import networkx as nx
125
123
126
124
G = nx .Graph ()
127
125
graph = MaigretGraph (G )
128
126
129
- for username , id_type , results in username_results :
130
- username_node_name = graph .add_node (id_type , username )
127
+ base_site_nodes = {}
128
+ site_account_nodes = {}
129
+ processed_values = {} # Track processed values to avoid duplicates
131
130
132
- for website_name in results :
133
- dictionary = results [website_name ]
134
- # TODO: fix no site data issue
135
- if not dictionary :
136
- continue
131
+ for username , id_type , results in username_results :
132
+ # Add username node, using normalized version directly if different
133
+ norm_username = username .lower ()
134
+ username_node_name = graph .add_node (id_type , norm_username )
137
135
138
- if dictionary .get ("is_similar" ):
136
+ for website_name , dictionary in results .items ():
137
+ if not dictionary or dictionary .get ("is_similar" ):
139
138
continue
140
139
141
140
status = dictionary .get ("status" )
142
- if not status : # FIXME: currently in case of timeout
141
+ if not status or status . status != MaigretCheckStatus . CLAIMED :
143
142
continue
144
143
145
- if dictionary ["status" ].status != MaigretCheckStatus .CLAIMED :
146
- continue
144
+ # base site node
145
+ site_base_url = website_name
146
+ if site_base_url not in base_site_nodes :
147
+ base_site_nodes [site_base_url ] = graph .add_node ('site' , site_base_url , color = '#28a745' ) # Green color
147
148
148
- site_fallback_name = dictionary .get (
149
- 'url_user' , f'{ website_name } : { username .lower ()} '
150
- )
151
- # site_node_name = dictionary.get('url_user', f'{website_name}: {username.lower()}')
152
- site_node_name = graph .add_node ('site' , site_fallback_name )
153
- graph .link (username_node_name , site_node_name )
149
+ site_base_node_name = base_site_nodes [site_base_url ]
150
+
151
+ # account node
152
+ account_url = dictionary .get ('url_user' , f'{ site_base_url } /{ norm_username } ' )
153
+ account_node_id = f"{ site_base_url } : { account_url } "
154
+ if account_node_id not in site_account_nodes :
155
+ site_account_nodes [account_node_id ] = graph .add_node ('account' , account_url )
156
+
157
+ account_node_name = site_account_nodes [account_node_id ]
158
+
159
+ # link username → account → site
160
+ graph .link (username_node_name , account_node_name )
161
+ graph .link (account_node_name , site_base_node_name )
154
162
155
163
def process_ids (parent_node , ids ):
156
164
for k , v in ids .items ():
157
- if k .endswith ('_count' ) or k .startswith ('is_' ) or k .endswith ('_at' ):
158
- continue
159
- if k in 'image' :
165
+ if k .endswith ('_count' ) or k .startswith ('is_' ) or k .endswith ('_at' ) or k in 'image' :
160
166
continue
161
167
162
- v_data = v
163
- if v .startswith ('[' ):
164
- try :
165
- v_data = ast .literal_eval (v )
166
- except Exception as e :
167
- logging .error (e )
168
-
169
- # value is a list
170
- if isinstance (v_data , list ):
171
- list_node_name = graph .add_node (k , site_fallback_name )
172
- for vv in v_data :
173
- data_node_name = graph .add_node (vv , site_fallback_name )
174
- graph .link (list_node_name , data_node_name )
175
-
176
- add_ids = {
177
- a : b for b , a in db .extract_ids_from_url (vv ).items ()
178
- }
179
- if add_ids :
180
- process_ids (data_node_name , add_ids )
168
+ # Normalize value if string
169
+ norm_v = v .lower () if isinstance (v , str ) else v
170
+ value_key = f"{ k } :{ norm_v } "
171
+
172
+ if value_key in processed_values :
173
+ ids_data_name = processed_values [value_key ]
181
174
else :
182
- # value is just a string
183
- # ids_data_name = f'{k}: {v}'
184
- # if ids_data_name == parent_node:
185
- # continue
186
-
187
- ids_data_name = graph .add_node (k , v )
188
- # G.add_node(ids_data_name, size=10, title=ids_data_name, group=3)
189
- graph .link (parent_node , ids_data_name )
190
-
191
- # check for username
192
- if 'username' in k or k in SUPPORTED_IDS :
193
- new_username_node_name = graph .add_node ('username' , v )
194
- graph .link (ids_data_name , new_username_node_name )
175
+ v_data = v
176
+ if isinstance (v , str ) and v .startswith ('[' ):
177
+ try :
178
+ v_data = ast .literal_eval (v )
179
+ except Exception as e :
180
+ logging .error (e )
181
+ continue
182
+
183
+ if isinstance (v_data , list ):
184
+ list_node_name = graph .add_node (k , site_base_url )
185
+ processed_values [value_key ] = list_node_name
186
+ for vv in v_data :
187
+ data_node_name = graph .add_node (vv , site_base_url )
188
+ graph .link (list_node_name , data_node_name )
189
+
190
+ add_ids = {a : b for b , a in db .extract_ids_from_url (vv ).items ()}
191
+ if add_ids :
192
+ process_ids (data_node_name , add_ids )
193
+ ids_data_name = list_node_name
194
+ else :
195
+ ids_data_name = graph .add_node (k , norm_v )
196
+ processed_values [value_key ] = ids_data_name
197
+
198
+ if 'username' in k or k in SUPPORTED_IDS :
199
+ new_username_key = f"username:{ norm_v } "
200
+ if new_username_key not in processed_values :
201
+ new_username_node_name = graph .add_node ('username' , norm_v )
202
+ processed_values [new_username_key ] = new_username_node_name
203
+ graph .link (ids_data_name , new_username_node_name )
204
+
205
+ add_ids = {k : v for v , k in db .extract_ids_from_url (v ).items ()}
206
+ if add_ids :
207
+ process_ids (ids_data_name , add_ids )
195
208
196
- add_ids = {k : v for v , k in db .extract_ids_from_url (v ).items ()}
197
- if add_ids :
198
- process_ids (ids_data_name , add_ids )
209
+ graph .link (parent_node , ids_data_name )
199
210
200
211
if status .ids_data :
201
- process_ids (site_node_name , status .ids_data )
212
+ process_ids (account_node_name , status .ids_data )
202
213
203
- nodes_to_remove = []
204
- for node in G .nodes :
205
- if len (str (node )) > 100 :
206
- nodes_to_remove .append (node )
214
+ # Remove overly long nodes
215
+ nodes_to_remove = [node for node in G .nodes if len (str (node )) > 100 ]
216
+ G .remove_nodes_from (nodes_to_remove )
207
217
208
- [G .remove_node (node ) for node in nodes_to_remove ]
218
+ # Remove site nodes with only one connection
219
+ single_degree_sites = [n for n , deg in G .degree () if n .startswith ("site:" ) and deg <= 1 ]
220
+ G .remove_nodes_from (single_degree_sites )
209
221
210
- # moved here to speed up the launch of Maigret
222
+ # Generate interactive visualization
211
223
from pyvis .network import Network
212
-
213
224
nt = Network (notebook = True , height = "750px" , width = "100%" )
214
225
nt .from_nx (G )
215
226
nt .show (filename )
0 commit comments