Skip to content

Commit ae82fe9

Browse files
committed
Storage nodes works ... need to change the monitor to wait for the package for a while
1 parent 6156f52 commit ae82fe9

File tree

4 files changed

+70
-61
lines changed

4 files changed

+70
-61
lines changed

common/info.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,15 @@
3636
"""
3737

3838
# Update all the stroage node IP everytime
39-
storage_1 = {"ip":"18.223.120.174", "port":9991}
40-
storage_2 = {"ip":"18.223.120.174", "port":9992}
39+
storage_1 = {"ip":"18.224.71.170", "port":9991}
40+
storage_2 = {"ip":"18.221.219.50", "port":9992}
4141
storage_3 = {"ip":"18.223.120.174", "port":9993}
4242
storage_4 = {"ip":"18.223.120.174", "port":9994}
4343

4444
storage_ip = {1:storage_1,2:storage_2,3:storage_3,4:storage_4}
4545

4646

4747
# Update all the monitor node IP
48-
monitor_1 = {"ip":"3.16.150.43", "port":12345}
48+
monitor_1 = {"ip":"18.188.179.6", "port":12345}
4949
monitor_2 = {"ip":"18.219.161.117", "port":12346}
5050
monitor_ip = {"primary": monitor_1, "backup": monitor_2}

common/transfer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ def _send_msg(socket, msg):
1010

1111
def _recv_msg(socket, size):
1212
msg = socket.recv(1024)
13+
1314
r_msg = pickle.loads(msg)
1415
return r_msg

monitor/monitor_gossip.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,23 @@ def heartbeat_protocol(soc):
3737
# Establish connection with client.
3838
c, addr = s.accept()
3939
print ('Got connection from', addr )
40-
40+
4141
# send a thank you message to the client.
42-
msg = _recv_msg(c, 1024)
43-
print(msg)
42+
try:
43+
msg = _recv_msg(c, 1024)
44+
print(msg)
45+
46+
if msg["type"] == "ALIVE":
47+
res = {"type": "ACK"}
48+
_send_msg(c, res)
4449

45-
if msg["type"] == "ALIVE":
46-
res = {"type": "ACK"}
47-
_send_msg(c, res)
50+
elif msg["type"] == "FAIL":
51+
res = {"type": "ACK"}
52+
_send_msg(c, res)
53+
gossip(c, msg)
4854

49-
elif msg["type"] == "FAIL":
50-
res = {"type": "ACK"}
51-
_send_msg(c, res)
52-
gossip(c, msg)
55+
except c.timeout: # fail after 1 second of no activity
56+
print(f"Didn't receive data! [Timeout] {addr}")
5357

5458
c.close()
5559

@@ -60,16 +64,15 @@ def heartbeat_protocol(soc):
6064
print ("Socket successfully created")
6165

6266
# reserve a port on your computer in our
63-
# case it is 12345 but it can be anything
64-
port = monitor_ip["backup"]["port"]
67+
port = monitor_ip["primary"]["port"]
6568

6669
# Next bind to the port
6770
# we have not typed any ip in the ip field
6871
# instead we have inputted an empty string
6972
# this makes the server listen to requests
7073
# coming from other computers on the network
7174
s.bind(('', port))
72-
print ("socket binded to %s" %(port))
75+
print ("Monitor socket binded to %s" %(port))
7376

7477
# put the socket into listening mode
7578
s.listen(5)

osd/storage_gossip.py

Lines changed: 50 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@
1111
def recovery(node_ip, node_id):
1212
#Will call monitor to state about the down node
1313
soc = socket.socket()
14-
soc.settimeout(3)
14+
soc.settimeout(5)
15+
print ("Socket successfully created for Recovery: Primary")
1516

1617
monitor_1 = monitor_ip["primary"]
1718

1819
try :
19-
soc.connect(monitor_1["ip"], monitor_1["port"])
20+
soc.connect((monitor_1["ip"], monitor_1["port"]))
2021
soc.timeout(None)
2122

2223
print(f"Connecting Primary monitor...")
@@ -32,20 +33,22 @@ def recovery(node_ip, node_id):
3233
pass
3334

3435
except soc.timeout: # fail after 1 second of no activity
35-
print("Didn't receive data! [Timeout] Primary Monitor is Down")
36+
print("Didn't receive data! [Timeout] Primary Monitor is failed in between")
3637

3738
except :
38-
print("Didn't receive data! [Timeout] Primary Monitor is Down")
39+
print("Didn't Connect! [Timeout] Primary Monitor is Down")
3940

4041
soc.close()
4142

4243
soc = socket.socket()
43-
soc.settimeout(3)
44+
soc.settimeout(5)
45+
print ("Socket successfully created for Recovery: Backup")
46+
4447

4548
monitor_2 = monitor_ip["backup"]
4649

4750
try :
48-
soc.connect(monitor_2["ip"], monitor_2["port"])
51+
soc.connect((monitor_2["ip"], monitor_2["port"]))
4952
soc.settimeout(None)
5053
print(f"Connecting Backup monitor...")
5154

@@ -60,10 +63,11 @@ def recovery(node_ip, node_id):
6063
pass
6164

6265
except soc.timeout: # fail after 1 second of no activity
63-
print("Didn't receive data! [Timeout] Primary Monitor is Down")
66+
print("Didn't receive data! [Timeout] Backup Monitor failed in between")
6467

6568
except:
66-
print("MAY GOD HELP US!! WE ARE DOOMED")
69+
print("MAY GOD HELP US!! WE ARE DOOMED\n\n")
70+
6771

6872
soc.close()
6973

@@ -77,37 +81,37 @@ def gossip():
7781

7882
i=0
7983
for i in range(4):
80-
node_ip = storage_ip[i+1]["ip"]
81-
port = storage_ip[i+1]["port"]
84+
if i+1 != STORAGE_ID:
85+
node_ip = storage_ip[i+1]["ip"]
86+
port = storage_ip[i+1]["port"]
8287

83-
soc.settimeout(3)
84-
soc = socket.socket()
85-
soc.settimeout(None)
86-
print ("Socket successfully created for Gossip")
87-
88-
try :
89-
soc.connect((node_ip, port))
90-
soc.timeout(None)
91-
92-
print(f"Connecting {node_ip} storage node number {i+1}")
93-
94-
msg = {"type": "ALIVE"}
95-
_send_msg(soc, msg)
96-
97-
try:
98-
rec = _recv_msg(c, 1024)
99-
if rec["type"] != "ALIVE":
88+
soc = socket.socket()
89+
soc.settimeout(3)
90+
print ("Socket successfully created for Gossip")
91+
92+
try :
93+
soc.connect((node_ip, port))
94+
soc.settimeout(None)
95+
96+
print(f"Connecting {node_ip} storage node number {i+1}")
97+
98+
msg = {"type": "ALIVE"}
99+
_send_msg(soc, msg)
100+
101+
try:
102+
rec = _recv_msg(c, 1024)
103+
if rec["type"] != "ALIVE":
104+
recovery(node_ip, i+1)
105+
106+
except soc.timeout: # fail after 1 second of no activity
107+
print(f"Didn't receive data to Storage {i+1} ip {node_ip}! [Timeout] ")
100108
recovery(node_ip, i+1)
101-
102-
except socket.timeout: # fail after 1 second of no activity
103-
print("Didn't receive data! [Timeout]")
109+
110+
except :
111+
print(f"Didn't Connect to Storage {i+1} ip {node_ip}! [Timeout]")
104112
recovery(node_ip, i+1)
105-
106-
except :
107-
print("Didn't receive data! [Timeout]")
108-
recovery(node_ip, i+1)
109113

110-
soc.close()
114+
soc.close()
111115

112116

113117

@@ -119,27 +123,28 @@ def heartbeat_protocol():
119123
print ("Socket successfully created for Heartbeat")
120124
port = storage_ip[STORAGE_ID]["port"]
121125
s.bind(('', port))
122-
print ("socket binded to %s" %(port))
126+
print ("Socket binded to %s" %(port))
123127

124128
# put the socket into listening mode
125129
s.listen(5)
126-
print ("socket is listening")
130+
print ("Socket is listening")
127131

128132
while True:
129133
# Establish connection with client.
130134
c, addr = s.accept()
131135
print ('Got connection from', addr )
132-
133-
# send a thank you message to the client.
134-
msg = _recv_msg(c, 1024)
135-
print(msg)
136136

137-
if msg["type"] == "ALIVE":
138-
res = {"type": "ACK"}
139-
_send_msg(c, res)
137+
try:
138+
msg = _recv_msg(c, 1024)
139+
if msg["type"] != "ALIVE":
140+
res = {"type": "ACK"}
141+
_send_msg(c, res)
140142

141-
c.close()
143+
except c.timeout: # fail after 1 second of no activity
144+
print(f"Didn't receive data from ip {addr}! [Timeout] ")
145+
recovery(addr, None)
142146

147+
c.close()
143148

144149

145150
if __name__ == "__main__":

0 commit comments

Comments
 (0)