Skip to content
This repository was archived by the owner on Jul 16, 2020. It is now read-only.

Commit accc9b9

Browse files
authored
Merge pull request #601 from rbradford/fix-launcher-deadlock
Fix launcher deadlock
2 parents 439629d + a59ae91 commit accc9b9

File tree

2 files changed

+44
-45
lines changed

2 files changed

+44
-45
lines changed

ciao-launcher/doc.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,7 @@
4242
// it instructs all child go routines to quit and waits for their exit. Note that
4343
// it only waits for 1 second. If all child go routines have failed to exit in 1
4444
// second, ciao-launcher panics. The panic is useful as it prints the stack trace of
45-
// all the running go routines, so you can see which ones are blocked. At least
46-
// this was the intention. The default behaviour of the go runtime has changed in
47-
// this regard in 1.6 so a small code change is required, but you get the idea, I
48-
// hope.
45+
// all the running go routines, so you can see which ones are blocked.
4946
//
5047
// The Server go routine
5148
//

ciao-launcher/main.go

Lines changed: 43 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717
package main
1818

1919
import (
20+
"context"
2021
"flag"
2122
"fmt"
2223
"log"
2324
"math"
2425
"os"
2526
"os/signal"
2627
"path"
28+
"runtime/debug"
2729
"sync"
2830
"syscall"
2931
"time"
3032

31-
"context"
32-
3333
"github.com/01org/ciao/osprepare"
3434
"github.com/01org/ciao/payloads"
3535
"github.com/01org/ciao/ssntp"
@@ -389,50 +389,52 @@ func connectToServer(doneCh chan struct{}, statusCh chan struct{}) {
389389
dialCh <- err
390390
}()
391391

392-
dialing := true
392+
select {
393+
case err := <-dialCh:
394+
if err != nil {
395+
break
396+
}
397+
clusterConfig, err := client.conn.ClusterConfiguration()
398+
if err != nil {
399+
glog.Errorf("Unable to get Cluster Configuration %v", err)
400+
client.conn.Close()
401+
break
402+
}
403+
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
404+
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
405+
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
406+
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
407+
if secretPath == "" {
408+
secretPath = clusterConfig.Configure.Storage.SecretPath
409+
}
410+
if cephID == "" {
411+
cephID = clusterConfig.Configure.Storage.CephID
412+
}
413+
printClusterConfig()
393414

394-
DONE:
395-
for {
396-
select {
397-
case err := <-dialCh:
398-
dialing = false
399-
if err != nil {
400-
break DONE
401-
}
402-
clusterConfig, err := client.conn.ClusterConfiguration()
403-
if err != nil {
404-
glog.Errorf("Unable to get Cluster Configuration %v", err)
405-
client.conn.Close()
406-
break DONE
407-
}
408-
computeNet = clusterConfig.Configure.Launcher.ComputeNetwork
409-
mgmtNet = clusterConfig.Configure.Launcher.ManagementNetwork
410-
diskLimit = clusterConfig.Configure.Launcher.DiskLimit
411-
memLimit = clusterConfig.Configure.Launcher.MemoryLimit
412-
if secretPath == "" {
413-
secretPath = clusterConfig.Configure.Storage.SecretPath
414-
}
415-
if cephID == "" {
416-
cephID = clusterConfig.Configure.Storage.CephID
417-
}
418-
printClusterConfig()
415+
client.installLauncherDeps()
419416

420-
client.installLauncherDeps()
417+
err = startNetwork(doneCh)
418+
if err != nil {
419+
glog.Errorf("Failed to start network: %v\n", err)
420+
client.conn.Close()
421+
break
422+
}
423+
defer shutdownNetwork()
421424

422-
err = startNetwork(doneCh)
423-
if err != nil {
424-
glog.Errorf("Failed to start network: %v\n", err)
425-
client.conn.Close()
426-
break DONE
427-
}
428-
defer shutdownNetwork()
425+
ovsCh = startOverseer(&wg, client)
426+
case <-doneCh:
427+
client.conn.Close()
428+
<-dialCh
429+
return
430+
}
429431

430-
ovsCh = startOverseer(&wg, client)
432+
DONE:
433+
for {
434+
select {
431435
case <-doneCh:
432436
client.conn.Close()
433-
if !dialing {
434-
break DONE
435-
}
437+
break DONE
436438
case cmd := <-client.cmdCh:
437439
/*
438440
Double check we're not quitting here. Otherwise a flood of commands
@@ -563,7 +565,7 @@ DONE:
563565
glog.Flush()
564566

565567
/* We panic here to see which naughty go routines are still running. */
566-
568+
debug.SetTraceback("all")
567569
panic("Server Loop did not exit within 1 second quitting")
568570
}
569571
}

0 commit comments

Comments
 (0)