3
3
import threading
4
4
import time
5
5
import os
6
+
7
+ from storage_replication import replicate_pg
8
+
9
+ sys .path .insert (1 , '../utils/' )
6
10
from transfer import _send_msg , _recv_msg , _wait_recv_msg
7
- from info import MONITOR_IPs , OSD_IPs , HEARTBEAT_PORT , num_objects_per_file , max_num_objects_per_pg , MSG_SIZE , HEADERSIZE
11
+ from info import mds_ip , monitor_ip , storage_ip , num_objects_per_file , max_num_objects_per_pg , MSG_SIZE , HEADERSIZE
12
+
13
+
8
14
9
- STORAGE_ID = 1
15
+ STORAGE_ID = "osd_id1"
10
16
11
17
12
- def report_monitor (node_ip , node_id ):
18
+ def report_monitor (node_ip , osd_id ):
13
19
flag = 0
14
20
#Will call monitor to state about the down node
15
21
soc = socket .socket ()
16
22
soc .settimeout (5 )
17
23
print ("Socket successfully created for Recovery: Primary" )
18
24
19
- monitor_1 = MONITOR_IPs ["primary" ]
25
+ monitor_1 = monitor_ip ["primary" ]
20
26
21
27
try :
22
28
soc .connect ((monitor_1 ["ip" ], monitor_1 ["port" ]))
23
29
# soc.timeout(None)
24
30
25
31
print (f"Connecting Primary monitor..." )
26
32
27
- res = {"type" : "FAIL" , "ip" : node_ip , "id " : node_id }
33
+ res = {"type" : "FAIL" , "ip" : node_ip , "osd_id " : osd_id }
28
34
_send_msg (soc , res )
29
35
30
36
msg = _wait_recv_msg (soc , MSG_SIZE )
@@ -47,14 +53,14 @@ def report_monitor(node_ip, node_id):
47
53
print ("Socket successfully created for Recovery: Backup" )
48
54
49
55
50
- monitor_2 = MONITOR_IPs ["backup" ]
56
+ monitor_2 = monitor_ip ["backup" ]
51
57
52
58
try :
53
59
soc .connect ((monitor_2 ["ip" ], monitor_2 ["port" ]))
54
60
soc .settimeout (None )
55
61
print (f"Connecting Backup monitor..." )
56
62
57
- res = {"type" : "FAIL" , "ip" : node_ip , "id " : node_id }
63
+ res = {"type" : "FAIL" , "ip" : node_ip , "osd_id " : osd_id }
58
64
_send_msg (soc , res )
59
65
60
66
msg = _wait_recv_msg (soc , MSG_SIZE )
@@ -79,35 +85,36 @@ def recv_gossip():
79
85
80
86
i = 0
81
87
for i in range (4 ):
82
- if i + 1 != STORAGE_ID :
83
- node_ip = OSD_IPs [i + 1 ]["ip" ]
84
- port = OSD_IPs [i + 1 ]["port" ]
88
+ curr_osd = "osd_id" + str (i + 1 )
89
+ if curr_osd != STORAGE_ID :
90
+ node_ip = storage_ip [curr_osd ]["ip" ]
91
+ port = storage_ip [curr_osd ]["port" ]
85
92
86
93
soc = socket .socket ()
87
94
soc .settimeout (5 )
88
- print (f"\n \n Socket successfully created for Gossip with osd { i + 1 } " )
95
+ print (f"\n \n Socket successfully created for Gossip with osd { curr_osd } " )
89
96
90
97
try :
91
98
soc .connect ((node_ip , port ))
92
99
soc .settimeout (None )
93
100
94
- print (f"Connecting { node_ip } storage node number { i + 1 } port { port } " )
101
+ print (f"Connecting { node_ip } storage node number { curr_osd } port { port } " )
95
102
96
103
msg = {"type" : "ALIVE" }
97
104
_send_msg (soc , msg )
98
105
99
106
rec = _wait_recv_msg (soc , MSG_SIZE )
100
107
print (msg )
101
108
if rec == None :
102
- print (f"Didn't receive data to Storage { i + 1 } ip { node_ip } ! [Timeout] " )
103
- report_monitor (node_ip , i + 1 )
109
+ print (f"Didn't receive data to Storage { curr_osd } ip { node_ip } ! [Timeout] " )
110
+ report_monitor (node_ip , curr_osd )
104
111
105
112
elif rec ["type" ] != "ACK" :
106
- report_monitor (node_ip , i + 1 )
113
+ report_monitor (node_ip , curr_osd )
107
114
108
115
except :
109
- print (f"Didn't Connect to Storage { i + 1 } ip { node_ip } ! [Timeout]" )
110
- report_monitor (node_ip , i + 1 )
116
+ print (f"Didn't Connect to Storage { curr_osd } ip { node_ip } ! [Timeout]" )
117
+ report_monitor (node_ip , curr_osd )
111
118
112
119
soc .close ()
113
120
@@ -121,13 +128,12 @@ def send_heartbeat():
121
128
122
129
s = socket .socket ()
123
130
print ("Socket successfully created for Heartbeat" )
124
- port = HEARTBEAT_PORT
131
+ port = storage_ip [ STORAGE_ID ][ "port" ]
125
132
s .bind (('' , port ))
126
133
print ("Socket binded to %s" % (port ))
127
134
128
135
# put the socket into listening mode
129
136
s .listen (5 )
130
- print ("Socket is listening" )
131
137
132
138
while True :
133
139
# Establish connection with client.
@@ -154,9 +160,29 @@ def send_heartbeat():
154
160
os ._exit (1 )
155
161
156
162
157
- if __name__ == "__main__" :
163
+ def read_write_pg_replication ():
164
+ #This will check for incoming messages
165
+ #from other nodes to replicate certain data
166
+
167
+ s = socket .socket ()
168
+ print ("Socket successfully created for Read-Write only for replication" )
169
+ # As mentioned in the info file
170
+ # Read/write request will be send on "port + 10"
171
+ port = storage_ip [STORAGE_ID ]["port" ] + 10
172
+ s .bind (('' , port ))
173
+ print ("Socket binded to %s" % (port ))
174
+
175
+ # put the socket into listening mode
176
+ s .listen (5 )
177
+
178
+ while True :
179
+ replicate_pg (s )
180
+
158
181
182
+ def _run_main_osd_gossip_cum_recovery ():
159
183
p1 = threading .Thread (target = send_heartbeat )
160
184
p2 = threading .Thread (target = recv_gossip )
185
+ p3 = threading .Thread (target = read_write_pg_replication )
161
186
p1 .start ()
162
- p2 .start ()
187
+ p2 .start ()
188
+ p3 .start ()
0 commit comments