1111def recovery (node_ip , node_id ):
1212 #Will call monitor to state about the down node
1313 soc = socket .socket ()
14- soc .settimeout (3 )
14+ soc .settimeout (5 )
15+ print ("Socket successfully created for Recovery: Primary" )
1516
1617 monitor_1 = monitor_ip ["primary" ]
1718
1819 try :
19- soc .connect (monitor_1 ["ip" ], monitor_1 ["port" ])
20+ soc .connect (( monitor_1 ["ip" ], monitor_1 ["port" ]) )
2021 soc .timeout (None )
2122
2223 print (f"Connecting Primary monitor..." )
@@ -32,20 +33,22 @@ def recovery(node_ip, node_id):
3233 pass
3334
3435 except soc .timeout : # fail after 1 second of no activity
35- print ("Didn't receive data! [Timeout] Primary Monitor is Down " )
36+ print ("Didn't receive data! [Timeout] Primary Monitor is failed in between " )
3637
3738 except :
38- print ("Didn't receive data ! [Timeout] Primary Monitor is Down" )
39+ print ("Didn't Connect ! [Timeout] Primary Monitor is Down" )
3940
4041 soc .close ()
4142
4243 soc = socket .socket ()
43- soc .settimeout (3 )
44+ soc .settimeout (5 )
45+ print ("Socket successfully created for Recovery: Backup" )
46+
4447
4548 monitor_2 = monitor_ip ["backup" ]
4649
4750 try :
48- soc .connect (monitor_2 ["ip" ], monitor_2 ["port" ])
51+ soc .connect (( monitor_2 ["ip" ], monitor_2 ["port" ]) )
4952 soc .settimeout (None )
5053 print (f"Connecting Backup monitor..." )
5154
@@ -60,10 +63,11 @@ def recovery(node_ip, node_id):
6063 pass
6164
6265 except soc .timeout : # fail after 1 second of no activity
63- print ("Didn't receive data! [Timeout] Primary Monitor is Down " )
66+ print ("Didn't receive data! [Timeout] Backup Monitor failed in between " )
6467
6568 except :
66- print ("MAY GOD HELP US!! WE ARE DOOMED" )
69+ print ("MAY GOD HELP US!! WE ARE DOOMED\n \n " )
70+
6771
6872 soc .close ()
6973
@@ -77,37 +81,37 @@ def gossip():
7781
7882 i = 0
7983 for i in range (4 ):
80- node_ip = storage_ip [i + 1 ]["ip" ]
81- port = storage_ip [i + 1 ]["port" ]
84+ if i + 1 != STORAGE_ID :
85+ node_ip = storage_ip [i + 1 ]["ip" ]
86+ port = storage_ip [i + 1 ]["port" ]
8287
83- soc .settimeout (3 )
84- soc = socket .socket ()
85- soc .settimeout (None )
86- print ("Socket successfully created for Gossip" )
87-
88- try :
89- soc .connect ((node_ip , port ))
90- soc .timeout (None )
91-
92- print (f"Connecting { node_ip } storage node number { i + 1 } " )
93-
94- msg = {"type" : "ALIVE" }
95- _send_msg (soc , msg )
96-
97- try :
98- rec = _recv_msg (c , 1024 )
99- if rec ["type" ] != "ALIVE" :
88+ soc = socket .socket ()
89+ soc .settimeout (3 )
90+ print ("Socket successfully created for Gossip" )
91+
92+ try :
93+ soc .connect ((node_ip , port ))
94+ soc .settimeout (None )
95+
96+ print (f"Connecting { node_ip } storage node number { i + 1 } " )
97+
98+ msg = {"type" : "ALIVE" }
99+ _send_msg (soc , msg )
100+
101+ try :
102+ rec = _recv_msg (c , 1024 )
103+ if rec ["type" ] != "ALIVE" :
104+ recovery (node_ip , i + 1 )
105+
106+ except soc .timeout : # fail after 1 second of no activity
107+ print (f"Didn't receive data to Storage { i + 1 } ip { node_ip } ! [Timeout] " )
100108 recovery (node_ip , i + 1 )
101-
102- except socket . timeout : # fail after 1 second of no activity
103- print ("Didn't receive data ! [Timeout]" )
109+
110+ except :
111+ print (f "Didn't Connect to Storage { i + 1 } ip { node_ip } ! [Timeout]" )
104112 recovery (node_ip , i + 1 )
105-
106- except :
107- print ("Didn't receive data! [Timeout]" )
108- recovery (node_ip , i + 1 )
109113
110- soc .close ()
114+ soc .close ()
111115
112116
113117
@@ -119,27 +123,28 @@ def heartbeat_protocol():
119123 print ("Socket successfully created for Heartbeat" )
120124 port = storage_ip [STORAGE_ID ]["port" ]
121125 s .bind (('' , port ))
122- print ("socket binded to %s" % (port ))
126+ print ("Socket binded to %s" % (port ))
123127
124128 # put the socket into listening mode
125129 s .listen (5 )
126- print ("socket is listening" )
130+ print ("Socket is listening" )
127131
128132 while True :
129133 # Establish connection with client.
130134 c , addr = s .accept ()
131135 print ('Got connection from' , addr )
132-
133- # send a thank you message to the client.
134- msg = _recv_msg (c , 1024 )
135- print (msg )
136136
137- if msg ["type" ] == "ALIVE" :
138- res = {"type" : "ACK" }
139- _send_msg (c , res )
137+ try :
138+ msg = _recv_msg (c , 1024 )
139+ if msg ["type" ] != "ALIVE" :
140+ res = {"type" : "ACK" }
141+ _send_msg (c , res )
140142
141- c .close ()
143+ except c .timeout : # fail after 1 second of no activity
144+ print (f"Didn't receive data from ip { addr } ! [Timeout] " )
145+ recovery (addr , None )
142146
147+ c .close ()
143148
144149
145150if __name__ == "__main__" :
0 commit comments