Skip to content

Commit 6156f52

Browse files
committed
Did some update to storage, need to create new socket for every neew IP and delete when not in use, added timeout values for connection
1 parent 580e799 commit 6156f52

File tree

1 file changed

+71
-51
lines changed

1 file changed

+71
-51
lines changed

osd/storage_gossip.py

Lines changed: 71 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -10,84 +10,104 @@
1010

1111
def recovery(node_ip, node_id):
1212
#Will call monitor to state about the down node
13+
soc = socket.socket()
14+
soc.settimeout(3)
15+
1316
monitor_1 = monitor_ip["primary"]
14-
soc.connect(monitor_1["ip"], monitor_1["port"])
15-
print(f"Connecting Primary monitor...")
1617

17-
res = {"type": "FAIL", "ip" : node_ip, "id" : node_id}
18-
_send_msg(soc, res)
19-
20-
try:
21-
msg = _recv_msg(c, 1024)
22-
if msg["type"] == "ACK":
23-
return
24-
else:
25-
pass
26-
27-
except socket.timeout: # fail after 1 second of no activity
18+
try :
19+
soc.connect(monitor_1["ip"], monitor_1["port"])
20+
soc.timeout(None)
21+
22+
print(f"Connecting Primary monitor...")
23+
24+
res = {"type": "FAIL", "ip" : node_ip, "id" : node_id}
25+
_send_msg(soc, res)
26+
27+
try:
28+
msg = _recv_msg(c, 1024)
29+
if msg["type"] == "ACK":
30+
return
31+
else:
32+
pass
33+
34+
except soc.timeout: # fail after 1 second of no activity
35+
print("Didn't receive data! [Timeout] Primary Monitor is Down")
36+
37+
except :
2838
print("Didn't receive data! [Timeout] Primary Monitor is Down")
29-
pass
3039

31-
finally:
32-
soc.close()
40+
soc.close()
3341

42+
soc = socket.socket()
43+
soc.settimeout(3)
3444

3545
monitor_2 = monitor_ip["backup"]
36-
soc.connect(monitor_2["ip"], monitor_2["port"])
37-
print(f"Connecting Backup monitor...")
38-
39-
res = {"type": "FAIL", "ip" : node_ip, "id" : node_id}
40-
_send_msg(soc, res)
4146

42-
try:
43-
msg = _recv_msg(c, 1024)
44-
if msg["type"] == "ACK":
45-
return
46-
else:
47-
pass
47+
try :
48+
soc.connect(monitor_2["ip"], monitor_2["port"])
49+
soc.settimeout(None)
50+
print(f"Connecting Backup monitor...")
51+
52+
res = {"type": "FAIL", "ip" : node_ip, "id" : node_id}
53+
_send_msg(soc, res)
4854

49-
except socket.timeout: # fail after 1 second of no activity
50-
print("Didn't receive data! [Timeout] Primary Monitor is Down")
51-
pass
55+
try:
56+
msg = _recv_msg(c, 1024)
57+
if msg["type"] == "ACK":
58+
return
59+
else:
60+
pass
5261

53-
finally:
54-
soc.close()
62+
except soc.timeout: # fail after 1 second of no activity
63+
print("Didn't receive data! [Timeout] Primary Monitor is Down")
5564

56-
print("MAY GOD HELP US!! WE ARE DOOMED")
65+
except:
66+
print("MAY GOD HELP US!! WE ARE DOOMED")
67+
68+
soc.close()
5769

5870

5971

6072
def gossip():
61-
soc = socket.socket()
62-
print ("Socket successfully created for Gossip")
6373

6474
while True:
65-
time.sleep(1)
66-
# Wait for 1/2 min to run this protocol
75+
time.sleep(10)
76+
# Wait for 10 sec to run this protocol
6777

6878
i=0
6979
for i in range(4):
7080
node_ip = storage_ip[i+1]["ip"]
7181
port = storage_ip[i+1]["port"]
72-
73-
soc.connect((node_ip, port))
74-
print(f"Connecting {node_ip} storage node number {i+1}")
75-
76-
res = {"type": "ALIVE"}
77-
_send_msg(soc, res)
7882

79-
try:
80-
msg = _recv_msg(c, 1024)
81-
if msg["type"] != "ALIVE":
83+
soc.settimeout(3)
84+
soc = socket.socket()
85+
soc.settimeout(None)
86+
print ("Socket successfully created for Gossip")
87+
88+
try :
89+
soc.connect((node_ip, port))
90+
soc.timeout(None)
91+
92+
print(f"Connecting {node_ip} storage node number {i+1}")
93+
94+
msg = {"type": "ALIVE"}
95+
_send_msg(soc, msg)
96+
97+
try:
98+
rec = _recv_msg(c, 1024)
99+
if rec["type"] != "ALIVE":
100+
recovery(node_ip, i+1)
101+
102+
except socket.timeout: # fail after 1 second of no activity
103+
print("Didn't receive data! [Timeout]")
82104
recovery(node_ip, i+1)
83-
84-
except socket.timeout: # fail after 1 second of no activity
105+
106+
except :
85107
print("Didn't receive data! [Timeout]")
86108
recovery(node_ip, i+1)
87-
88-
finally:
89-
soc.close()
90109

110+
soc.close()
91111

92112

93113

0 commit comments

Comments
 (0)