-
Notifications
You must be signed in to change notification settings - Fork 29
/
Copy pathtorchft.proto
123 lines (101 loc) · 2.83 KB
/
torchft.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
syntax = "proto3";
package torchft;
import "google/protobuf/timestamp.proto";
message RaftMessageRequest {
// Request message contains the serialized Raft proto message.
bytes message = 1;
}
message RaftMessageResponse {
}
message NodeInfo {
uint64 rank = 1;
string address = 2;
}
message InfoRequest {
NodeInfo requester = 1;
}
message InfoResponse {
repeated NodeInfo peers = 1;
}
service CoordinatorService {
rpc RaftMessage (RaftMessageRequest) returns (RaftMessageResponse);
rpc Info (InfoRequest) returns (InfoResponse);
}
message QuorumMember {
string replica_id = 1;
string address = 2;
string store_address = 3;
int64 step = 4;
uint64 world_size = 5;
bool shrink_only = 6;
}
message Quorum {
int64 quorum_id = 1;
repeated QuorumMember participants = 2;
google.protobuf.Timestamp created = 3;
}
message LighthouseQuorumRequest {
QuorumMember requester = 1;
}
message LighthouseQuorumResponse {
Quorum quorum = 1;
}
message LighthouseHeartbeatRequest {
string replica_id = 1;
}
message LighthouseHeartbeatResponse {}
service LighthouseService {
rpc Quorum (LighthouseQuorumRequest) returns (LighthouseQuorumResponse);
rpc Heartbeat (LighthouseHeartbeatRequest) returns (LighthouseHeartbeatResponse);
}
message ManagerQuorumRequest {
int64 rank = 1;
int64 step = 2;
string checkpoint_metadata = 3;
bool shrink_only = 4;
bool init_sync = 5;
}
message ManagerQuorumResponse {
int64 quorum_id = 1;
string recover_src_manager_address = 2;
optional int64 recover_src_rank = 3;
repeated int64 recover_dst_ranks = 4;
string store_address = 5;
// These are information for the replicas which are at the max step.
int64 max_step = 6;
optional int64 max_rank = 7;
int64 max_world_size = 8;
// These are information for all replicas including behind replicas.
int64 replica_rank = 9;
int64 replica_world_size = 10;
bool heal = 11;
}
message CheckpointMetadataRequest {
int64 rank = 1;
}
message CheckpointMetadataResponse {
string checkpoint_metadata = 1;
}
message ShouldCommitRequest {
bool should_commit = 1;
int64 rank = 2;
int64 step = 3;
}
message ShouldCommitResponse {
bool should_commit = 1;
}
message KillRequest {
string msg = 1;
}
message KillResponse {}
service ManagerService {
rpc Quorum (ManagerQuorumRequest) returns (ManagerQuorumResponse);
rpc CheckpointMetadata(CheckpointMetadataRequest) returns (CheckpointMetadataResponse);
rpc ShouldCommit(ShouldCommitRequest) returns (ShouldCommitResponse);
rpc Kill(KillRequest) returns (KillResponse);
}