Skip to content
This repository was archived by the owner on Jul 16, 2020. It is now read-only.

Commit a59ae91

Browse files
committed
ciao-launcher: Fix potential deadlock at launcher startup
Ensure that the connection to the server and overseer is ready before processing commands. This avoids a race where ConnectNotify() was returning a command before Dial() (and it's follow-up work) was completed. This would result in an deadlock as the overseer was not ready to receive the command. Fixes: #591 Signed-off-by: Rob Bradford <[email protected]>
1 parent c7116c3 commit a59ae91

File tree

1 file changed

+41
-40
lines changed

1 file changed

+41
-40
lines changed

ciao-launcher/main.go

Lines changed: 41 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package main
1818

1919
import (
20+
"context"
2021
"flag"
2122
"fmt"
2223
"log"
@@ -29,8 +30,6 @@ import (
2930
"syscall"
3031
"time"
3132

32-
"context"
33-
3433
"github.com/01org/ciao/osprepare"
3534
"github.com/01org/ciao/payloads"
3635
"github.com/01org/ciao/ssntp"
@@ -390,50 +389,52 @@ func connectToServer(doneCh chan struct{}, statusCh chan struct{}) {
390389
dialCh <- err
391390
}()
392391

393-
dialing := true
392+
select {
393+
case err := <-dialCh:
394+
if err != nil {
395+
break
396+
}
397+
clusterConfig, err := client.conn.ClusterConfiguration()
398+
if err != nil {
399+
glog.Errorf("Unable to get Cluster Configuration %v", err)
400+
client.conn.Close()
401+
break
402+
}
403+
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
404+
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
405+
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
406+
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
407+
if secretPath == "" {
408+
secretPath = clusterConfig.Configure.Storage.SecretPath
409+
}
410+
if cephID == "" {
411+
cephID = clusterConfig.Configure.Storage.CephID
412+
}
413+
printClusterConfig()
394414

395-
DONE:
396-
for {
397-
select {
398-
case err := <-dialCh:
399-
dialing = false
400-
if err != nil {
401-
break DONE
402-
}
403-
clusterConfig, err := client.conn.ClusterConfiguration()
404-
if err != nil {
405-
glog.Errorf("Unable to get Cluster Configuration %v", err)
406-
client.conn.Close()
407-
break DONE
408-
}
409-
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
410-
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
411-
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
412-
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
413-
if secretPath == "" {
414-
secretPath = clusterConfig.Configure.Storage.SecretPath
415-
}
416-
if cephID == "" {
417-
cephID = clusterConfig.Configure.Storage.CephID
418-
}
419-
printClusterConfig()
415+
client.installLauncherDeps()
420416

421-
client.installLauncherDeps()
417+
err = startNetwork(doneCh)
418+
if err != nil {
419+
glog.Errorf("Failed to start network: %v\n", err)
420+
client.conn.Close()
421+
break
422+
}
423+
defer shutdownNetwork()
422424

423-
err = startNetwork(doneCh)
424-
if err != nil {
425-
glog.Errorf("Failed to start network: %v\n", err)
426-
client.conn.Close()
427-
break DONE
428-
}
429-
defer shutdownNetwork()
425+
ovsCh = startOverseer(&wg, client)
426+
case <-doneCh:
427+
client.conn.Close()
428+
<-dialCh
429+
return
430+
}
430431

431-
ovsCh = startOverseer(&wg, client)
432+
DONE:
433+
for {
434+
select {
432435
case <-doneCh:
433436
client.conn.Close()
434-
if !dialing {
435-
break DONE
436-
}
437+
break DONE
437438
case cmd := <-client.cmdCh:
438439
/*
439440
Double check we're not quitting here. Otherwise a flood of commands

0 commit comments

Comments
 (0)