Skip to content
This repository has been archived by the owner on Jul 16, 2020. It is now read-only.

Commit

Permalink
Merge pull request #601 from rbradford/fix-launcher-deadlock
Browse files Browse the repository at this point in the history
Fix launcher deadlock
  • Loading branch information
mcastelino authored Sep 28, 2016
2 parents 439629d + a59ae91 commit accc9b9
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 45 deletions.
5 changes: 1 addition & 4 deletions ciao-launcher/doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,7 @@
// it instructs all child go routines to quit and waits for their exit. Note that
// it only waits for 1 second. If all child go routines have failed to exit in 1
// second, ciao-launcher panics. The panic is useful as it prints the stack trace of
// all the running go routines, so you can see which ones are blocked. At least
// this was the intention. The default behaviour of the go runtime has changed in
// this regard in 1.6 so a small code change is required, but you get the idea, I
// hope.
// all the running go routines, so you can see which ones are blocked.
//
// The Server go routine
//
Expand Down
84 changes: 43 additions & 41 deletions ciao-launcher/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@
package main

import (
"context"
"flag"
"fmt"
"log"
"math"
"os"
"os/signal"
"path"
"runtime/debug"
"sync"
"syscall"
"time"

"context"

"github.com/01org/ciao/osprepare"
"github.com/01org/ciao/payloads"
"github.com/01org/ciao/ssntp"
Expand Down Expand Up @@ -389,50 +389,52 @@ func connectToServer(doneCh chan struct{}, statusCh chan struct{}) {
dialCh <- err
}()

dialing := true
select {
case err := <-dialCh:
if err != nil {
break
}
clusterConfig, err := client.conn.ClusterConfiguration()
if err != nil {
glog.Errorf("Unable to get Cluster Configuration %v", err)
client.conn.Close()
break
}
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
if secretPath == "" {
secretPath = clusterConfig.Configure.Storage.SecretPath
}
if cephID == "" {
cephID = clusterConfig.Configure.Storage.CephID
}
printClusterConfig()

DONE:
for {
select {
case err := <-dialCh:
dialing = false
if err != nil {
break DONE
}
clusterConfig, err := client.conn.ClusterConfiguration()
if err != nil {
glog.Errorf("Unable to get Cluster Configuration %v", err)
client.conn.Close()
break DONE
}
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
if secretPath == "" {
secretPath = clusterConfig.Configure.Storage.SecretPath
}
if cephID == "" {
cephID = clusterConfig.Configure.Storage.CephID
}
printClusterConfig()
client.installLauncherDeps()

client.installLauncherDeps()
err = startNetwork(doneCh)
if err != nil {
glog.Errorf("Failed to start network: %v\n", err)
client.conn.Close()
break
}
defer shutdownNetwork()

err = startNetwork(doneCh)
if err != nil {
glog.Errorf("Failed to start network: %v\n", err)
client.conn.Close()
break DONE
}
defer shutdownNetwork()
ovsCh = startOverseer(&wg, client)
case <-doneCh:
client.conn.Close()
<-dialCh
return
}

ovsCh = startOverseer(&wg, client)
DONE:
for {
select {
case <-doneCh:
client.conn.Close()
if !dialing {
break DONE
}
break DONE
case cmd := <-client.cmdCh:
/*
Double check we're not quitting here. Otherwise a flood of commands
Expand Down Expand Up @@ -563,7 +565,7 @@ DONE:
glog.Flush()

/* We panic here to see which naughty go routines are still running. */

debug.SetTraceback("all")
panic("Server Loop did not exit within 1 second quitting")
}
}
Expand Down

0 comments on commit accc9b9

Please sign in to comment.