Skip to content

Commit a5ad159

Browse files
authored
Merge pull request #140 from Raytar/channels-refactor
Refactor and rename orderedNodeStream and receiveQueue
2 parents dc76971 + 6b8a3d1 commit a5ad159

File tree

13 files changed

+284
-608
lines changed

13 files changed

+284
-608
lines changed

async.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package gorums
33
import (
44
"context"
55

6+
"github.com/relab/gorums/ordering"
67
"google.golang.org/protobuf/reflect/protoreflect"
78
)
89

@@ -31,8 +32,8 @@ func (f *Async) Done() bool {
3132

3233
func (c Configuration) AsyncCall(ctx context.Context, d QuorumCallData) *Async {
3334
expectedReplies := len(c)
34-
md := c.newCall(d.Method)
35-
replyChan, callDone := c.newReply(md, expectedReplies)
35+
md := &ordering.Metadata{MessageID: c.getMsgID(), Method: d.Method}
36+
replyChan := make(chan response, expectedReplies)
3637

3738
for _, n := range c {
3839
msg := d.Message
@@ -43,13 +44,12 @@ func (c Configuration) AsyncCall(ctx context.Context, d QuorumCallData) *Async {
4344
continue // don't send if no msg
4445
}
4546
}
46-
n.sendQ <- gorumsStreamRequest{ctx: ctx, msg: &Message{Metadata: md, Message: msg}}
47+
n.channel.enqueue(request{ctx: ctx, msg: &Message{Metadata: md, Message: msg}}, replyChan)
4748
}
4849

4950
fut := &Async{c: make(chan struct{}, 1)}
5051

5152
go func() {
52-
defer callDone()
5353
defer close(fut.c)
5454

5555
var (
@@ -66,7 +66,7 @@ func (c Configuration) AsyncCall(ctx context.Context, d QuorumCallData) *Async {
6666
errs = append(errs, Error{r.nid, r.err})
6767
break
6868
}
69-
replies[r.nid] = r.reply
69+
replies[r.nid] = r.msg
7070
if resp, quorum = d.QuorumFunction(d.Message, replies); quorum {
7171
fut.reply, fut.err = resp, nil
7272
return

channel.go

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
package gorums
2+
3+
import (
4+
"context"
5+
"math"
6+
"math/rand"
7+
"sync"
8+
"sync/atomic"
9+
"time"
10+
11+
"github.com/relab/gorums/ordering"
12+
"google.golang.org/grpc"
13+
"google.golang.org/grpc/codes"
14+
"google.golang.org/grpc/status"
15+
"google.golang.org/protobuf/reflect/protoreflect"
16+
)
17+
18+
type request struct {
19+
ctx context.Context
20+
msg *Message
21+
opts callOptions
22+
}
23+
24+
type response struct {
25+
nid uint32
26+
msg protoreflect.ProtoMessage
27+
err error
28+
}
29+
30+
type channel struct {
31+
sendQ chan request
32+
node *Node // needed for ID and setLastError
33+
rand *rand.Rand
34+
gorumsClient ordering.GorumsClient
35+
gorumsStream ordering.Gorums_NodeStreamClient
36+
streamMut sync.RWMutex
37+
streamBroken atomicFlag
38+
parentCtx context.Context
39+
streamCtx context.Context
40+
cancelStream context.CancelFunc
41+
responseRouter map[uint64]chan<- response
42+
responseMut sync.Mutex
43+
}
44+
45+
func newChannel(n *Node) *channel {
46+
return &channel{
47+
sendQ: make(chan request, n.mgr.opts.sendBuffer),
48+
node: n,
49+
rand: rand.New(rand.NewSource(time.Now().UnixNano())),
50+
responseRouter: make(map[uint64]chan<- response),
51+
}
52+
}
53+
54+
func (c *channel) connect(ctx context.Context, conn *grpc.ClientConn) error {
55+
var err error
56+
c.parentCtx = ctx
57+
c.streamCtx, c.cancelStream = context.WithCancel(c.parentCtx)
58+
c.gorumsClient = ordering.NewGorumsClient(conn)
59+
c.gorumsStream, err = c.gorumsClient.NodeStream(c.streamCtx)
60+
if err != nil {
61+
return err
62+
}
63+
go c.sendMsgs()
64+
go c.recvMsgs()
65+
return nil
66+
}
67+
68+
func (c *channel) routeResponse(msgID uint64, resp response) {
69+
c.responseMut.Lock()
70+
defer c.responseMut.Unlock()
71+
if ch, ok := c.responseRouter[msgID]; ok {
72+
ch <- resp
73+
delete(c.responseRouter, msgID)
74+
}
75+
}
76+
77+
func (c *channel) enqueue(req request, responseChan chan<- response) {
78+
if responseChan != nil {
79+
c.responseMut.Lock()
80+
c.responseRouter[req.msg.Metadata.MessageID] = responseChan
81+
c.responseMut.Unlock()
82+
}
83+
c.sendQ <- req
84+
}
85+
86+
func (c *channel) sendMsg(req request) (err error) {
87+
// unblock the waiting caller unless noSendWaiting is enabled
88+
defer func() {
89+
if req.opts.callType == E_Multicast || req.opts.callType == E_Unicast && !req.opts.noSendWaiting {
90+
c.routeResponse(req.msg.Metadata.MessageID, response{})
91+
}
92+
}()
93+
94+
// don't send if context is already cancelled.
95+
if req.ctx.Err() != nil {
96+
return req.ctx.Err()
97+
}
98+
99+
c.streamMut.RLock()
100+
defer c.streamMut.RUnlock()
101+
102+
done := make(chan struct{}, 1)
103+
104+
// wait for either the message to be sent, or the request context being cancelled.
105+
// if the request context was cancelled, then we most likely have a blocked stream.
106+
go func() {
107+
select {
108+
case <-done:
109+
case <-req.ctx.Done():
110+
c.cancelStream()
111+
}
112+
}()
113+
114+
err = c.gorumsStream.SendMsg(req.msg)
115+
if err != nil {
116+
c.node.setLastErr(err)
117+
c.streamBroken.set()
118+
}
119+
done <- struct{}{}
120+
121+
return err
122+
}
123+
124+
func (c *channel) sendMsgs() {
125+
var req request
126+
for {
127+
select {
128+
case <-c.parentCtx.Done():
129+
return
130+
case req = <-c.sendQ:
131+
}
132+
// return error if stream is broken
133+
if c.streamBroken.get() {
134+
err := status.Errorf(codes.Unavailable, "stream is down")
135+
c.routeResponse(req.msg.Metadata.MessageID, response{nid: c.node.ID(), msg: nil, err: err})
136+
continue
137+
}
138+
// else try to send message
139+
err := c.sendMsg(req)
140+
if err != nil {
141+
// return the error
142+
c.routeResponse(req.msg.Metadata.MessageID, response{nid: c.node.ID(), msg: nil, err: err})
143+
}
144+
}
145+
}
146+
147+
func (c *channel) recvMsgs() {
148+
for {
149+
resp := newMessage(responseType)
150+
c.streamMut.RLock()
151+
err := c.gorumsStream.RecvMsg(resp)
152+
if err != nil {
153+
c.streamBroken.set()
154+
c.streamMut.RUnlock()
155+
c.node.setLastErr(err)
156+
// attempt to reconnect
157+
c.reconnect()
158+
} else {
159+
c.streamMut.RUnlock()
160+
err := status.FromProto(resp.Metadata.GetStatus()).Err()
161+
c.routeResponse(resp.Metadata.MessageID, response{nid: c.node.ID(), msg: resp.Message, err: err})
162+
}
163+
164+
select {
165+
case <-c.parentCtx.Done():
166+
return
167+
default:
168+
}
169+
}
170+
}
171+
172+
func (c *channel) reconnect() {
173+
c.streamMut.Lock()
174+
defer c.streamMut.Unlock()
175+
backoffCfg := c.node.mgr.opts.backoff
176+
177+
var retries float64
178+
for {
179+
var err error
180+
181+
c.streamCtx, c.cancelStream = context.WithCancel(c.parentCtx)
182+
c.gorumsStream, err = c.gorumsClient.NodeStream(c.streamCtx)
183+
if err == nil {
184+
c.streamBroken.clear()
185+
return
186+
}
187+
c.cancelStream()
188+
c.node.setLastErr(err)
189+
delay := float64(backoffCfg.BaseDelay)
190+
max := float64(backoffCfg.MaxDelay)
191+
for r := retries; delay < max && r > 0; r-- {
192+
delay *= backoffCfg.Multiplier
193+
}
194+
delay = math.Min(delay, max)
195+
delay *= 1 + backoffCfg.Jitter*(rand.Float64()*2-1)
196+
select {
197+
case <-time.After(time.Duration(delay)):
198+
retries++
199+
case <-c.parentCtx.Done():
200+
return
201+
}
202+
}
203+
}
204+
205+
type atomicFlag struct {
206+
flag int32
207+
}
208+
209+
func (f *atomicFlag) set() { atomic.StoreInt32(&f.flag, 1) }
210+
func (f *atomicFlag) get() bool { return atomic.LoadInt32(&f.flag) == 1 }
211+
func (f *atomicFlag) clear() { atomic.StoreInt32(&f.flag, 0) }

config.go

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,6 @@ package gorums
22

33
import (
44
"fmt"
5-
6-
"github.com/relab/gorums/ordering"
75
)
86

97
// Configuration represents a static set of nodes on which quorum calls may be invoked.
@@ -52,17 +50,6 @@ func (c Configuration) Equal(b Configuration) bool {
5250
return true
5351
}
5452

55-
// newCall returns unique metadata for a method call.
56-
func (c Configuration) newCall(method string) (md *ordering.Metadata) {
57-
// Note that we just use the first node's newCall method since all nodes
58-
// associated with the same manager use the same receiveQueue instance.
59-
return c[0].newCall(method)
60-
}
61-
62-
// newReply returns a channel for receiving replies
63-
// and a done function to be called for clean up.
64-
func (c Configuration) newReply(md *ordering.Metadata, maxReplies int) (replyChan chan *gorumsStreamResult, done func()) {
65-
// Note that we just use the first node's newReply method since all nodes
66-
// associated with the same manager use the same receiveQueue instance.
67-
return c[0].newReply(md, maxReplies)
53+
func (c Configuration) getMsgID() uint64 {
54+
return c[0].mgr.getMsgID()
6855
}

correctable.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"sync"
66

7+
"github.com/relab/gorums/ordering"
78
"google.golang.org/protobuf/reflect/protoreflect"
89
)
910

@@ -82,9 +83,9 @@ type CorrectableCallData struct {
8283

8384
func (c Configuration) CorrectableCall(ctx context.Context, d CorrectableCallData) *Correctable {
8485
expectedReplies := len(c)
85-
md := c.newCall(d.Method)
86-
replyChan, callDone := c.newReply(md, expectedReplies)
86+
md := &ordering.Metadata{MessageID: c.getMsgID(), Method: d.Method}
8787

88+
replyChan := make(chan response, expectedReplies)
8889
for _, n := range c {
8990
msg := d.Message
9091
if d.PerNodeArgFn != nil {
@@ -94,14 +95,12 @@ func (c Configuration) CorrectableCall(ctx context.Context, d CorrectableCallDat
9495
continue // don't send if no msg
9596
}
9697
}
97-
n.sendQ <- gorumsStreamRequest{ctx: ctx, msg: &Message{Metadata: md, Message: msg}}
98+
n.channel.enqueue(request{ctx: ctx, msg: &Message{Metadata: md, Message: msg}}, replyChan)
9899
}
99100

100101
corr := &Correctable{donech: make(chan struct{}, 1)}
101102

102103
go func() {
103-
defer callDone()
104-
105104
var (
106105
resp protoreflect.ProtoMessage
107106
errs []Error
@@ -118,15 +117,15 @@ func (c Configuration) CorrectableCall(ctx context.Context, d CorrectableCallDat
118117
errs = append(errs, Error{r.nid, r.err})
119118
break
120119
}
121-
replies[r.nid] = r.reply
120+
replies[r.nid] = r.msg
122121
if resp, rlevel, quorum = d.QuorumFunction(d.Message, replies); quorum {
123122
if quorum {
124-
corr.set(r.reply, rlevel, nil, true)
123+
corr.set(r.msg, rlevel, nil, true)
125124
return
126125
}
127126
if rlevel > clevel {
128127
clevel = rlevel
129-
corr.set(r.reply, rlevel, nil, false)
128+
corr.set(r.msg, rlevel, nil, false)
130129
}
131130
}
132131
case <-ctx.Done():

mgr.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"fmt"
55
"log"
66
"sync"
7+
"sync/atomic"
78

89
"google.golang.org/grpc"
910
"google.golang.org/grpc/backoff"
@@ -18,8 +19,7 @@ type Manager struct {
1819
closeOnce sync.Once
1920
logger *log.Logger
2021
opts managerOptions
21-
22-
*receiveQueue
22+
nextMsgID uint64
2323
}
2424

2525
// NewManager returns a new Manager for managing connection to nodes added
@@ -28,9 +28,8 @@ type Manager struct {
2828
// You should use the `NewManager` function in the generated code instead.
2929
func NewManager(opts ...ManagerOption) *Manager {
3030
m := &Manager{
31-
lookup: make(map[uint32]*Node),
32-
receiveQueue: newReceiveQueue(),
33-
opts: newManagerOptions(),
31+
lookup: make(map[uint32]*Node),
32+
opts: newManagerOptions(),
3433
}
3534
for _, opt := range opts {
3635
opt(&m.opts)
@@ -116,7 +115,7 @@ func (m *Manager) AddNode(node *Node) error {
116115
if m.logger != nil {
117116
m.logger.Printf("connecting to %s with id %d\n", node, node.id)
118117
}
119-
if err := node.connect(m.receiveQueue, m.opts); err != nil {
118+
if err := node.connect(m); err != nil {
120119
return fmt.Errorf("connection failed for %s: %w", node, err)
121120
}
122121

@@ -126,3 +125,8 @@ func (m *Manager) AddNode(node *Node) error {
126125
m.nodes = append(m.nodes, node)
127126
return nil
128127
}
128+
129+
// getMsgID returns a unique message ID.
130+
func (m *Manager) getMsgID() uint64 {
131+
return atomic.AddUint64(&m.nextMsgID, 1)
132+
}

0 commit comments

Comments
 (0)