Skip to content
This repository has been archived by the owner on Jul 16, 2020. It is now read-only.

Commit

Permalink
ciao-launcher: Fix potential deadlock at launcher startup
Browse files Browse the repository at this point in the history
Ensure that the connection to the server and overseer is ready before
processing commands.

This avoids a race where ConnectNotify() was returning a command before
Dial() (and it's follow-up work) was completed. This would result in an
deadlock as the overseer was not ready to receive the command.

Fixes: #591
Signed-off-by: Rob Bradford <[email protected]>
  • Loading branch information
rbradford committed Sep 28, 2016
1 parent c7116c3 commit a59ae91
Showing 1 changed file with 41 additions and 40 deletions.
81 changes: 41 additions & 40 deletions ciao-launcher/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package main

import (
"context"
"flag"
"fmt"
"log"
Expand All @@ -29,8 +30,6 @@ import (
"syscall"
"time"

"context"

"github.com/01org/ciao/osprepare"
"github.com/01org/ciao/payloads"
"github.com/01org/ciao/ssntp"
Expand Down Expand Up @@ -390,50 +389,52 @@ func connectToServer(doneCh chan struct{}, statusCh chan struct{}) {
dialCh <- err
}()

dialing := true
select {
case err := <-dialCh:
if err != nil {
break
}
clusterConfig, err := client.conn.ClusterConfiguration()
if err != nil {
glog.Errorf("Unable to get Cluster Configuration %v", err)
client.conn.Close()
break
}
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
if secretPath == "" {
secretPath = clusterConfig.Configure.Storage.SecretPath
}
if cephID == "" {
cephID = clusterConfig.Configure.Storage.CephID
}
printClusterConfig()

DONE:
for {
select {
case err := <-dialCh:
dialing = false
if err != nil {
break DONE
}
clusterConfig, err := client.conn.ClusterConfiguration()
if err != nil {
glog.Errorf("Unable to get Cluster Configuration %v", err)
client.conn.Close()
break DONE
}
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
if secretPath == "" {
secretPath = clusterConfig.Configure.Storage.SecretPath
}
if cephID == "" {
cephID = clusterConfig.Configure.Storage.CephID
}
printClusterConfig()
client.installLauncherDeps()

client.installLauncherDeps()
err = startNetwork(doneCh)
if err != nil {
glog.Errorf("Failed to start network: %v\n", err)
client.conn.Close()
break
}
defer shutdownNetwork()

err = startNetwork(doneCh)
if err != nil {
glog.Errorf("Failed to start network: %v\n", err)
client.conn.Close()
break DONE
}
defer shutdownNetwork()
ovsCh = startOverseer(&wg, client)
case <-doneCh:
client.conn.Close()
<-dialCh
return
}

ovsCh = startOverseer(&wg, client)
DONE:
for {
select {
case <-doneCh:
client.conn.Close()
if !dialing {
break DONE
}
break DONE
case cmd := <-client.cmdCh:
/*
Double check we're not quitting here. Otherwise a flood of commands
Expand Down

0 comments on commit a59ae91

Please sign in to comment.