33package pexec
44
55import (
6+ "os"
67 "os/exec"
78 "strconv"
89 "strings"
910 "syscall"
1011 "time"
1112
1213 "github.com/pkg/errors"
14+ "golang.org/x/sys/windows"
1315)
1416
1517func sigStr (sig syscall.Signal ) string {
@@ -35,75 +37,122 @@ func (p *managedProcess) sysProcAttr() (*syscall.SysProcAttr, error) {
3537 return ret , nil
3638}
3739
40+ // kill attempts to stop the managedProcess.
41+ // The boolean return value indicates whether the process was force killed or not. If the process is already done
42+ // or no longer exist, a special ProcessNotExistsError is returned.
3843func (p * managedProcess ) kill () (bool , error ) {
44+ // NOTE: the first kill attempt behavior is different on unix. If the first attempt to kill the
45+ // process results in os.ErrProcessDone on unix, no error is returned. Only if a process
46+ // is not found when trying to kill its tree or force kill it, then &ProcessNotExistsError
47+ // is returned.
48+ // On windows, we will always return this error, even when the first attempt failed.
3949 const mustForce = "This process can only be terminated forcefully"
4050 pidStr := strconv .Itoa (p .cmd .Process .Pid )
4151 p .logger .Infof ("killing process %d" , p .cmd .Process .Pid )
4252 // First let's try to ask the process to stop. If it's a console application, this is
4353 // very unlikely to work.
4454 var shouldJustForce bool
55+ // Our first attempt to gracefully close the process is taskkill. Taskkill is a windows
56+ // replacement for kill. However, windows does not implement signals in the same way
57+ // unix does, so their IPC involves "messages". Research has not shown exactly what is
58+ // the message sent by taskkill, but it is most likely WM_CLOSE (another option that I
59+ // have seen in discussions is WM_QUIT). WM_CLOSE is similar to pressing the X button on
60+ // an application window, which asks the process to shutdown, but the handling of this
61+ // "message" is up to the process. Moreover, to receive this message, a process needs to
62+ // have a "window". Since most viam modules do not have their own windows, killing them
63+ // results in a message "This process can only be terminated forcefully". However, this
64+ // line can potentially work if a module will have its own window.
4565 if out , err := exec .Command ("taskkill" , "/pid" , pidStr ).CombinedOutput (); err != nil {
4666 switch {
4767 case strings .Contains (string (out ), mustForce ):
4868 p .logger .Debug ("must force terminate process" )
69+ // if taskkill doesn't find a window to terminate the process, we will attempt to
70+ // send a "break" control event, which asks for a graceful shutdown of the whole
71+ // process group.
72+
73+ // GenerateConsoleCtrlEvent functions differently from taskkill. In particular, it
74+ // sends a "signal" to a process group that shares a console with the calling process.
75+ // Since we specify the CREATE_NEW_PROCESS_GROUP flag in SysProcAttr, this pattern
76+ // works well for us for two reasons:
77+ // a) The module still shares the console with the calling process (viam-server). If
78+ // we were to specify CREATE_NEW_CONSOLE in the creation flags, this would no longer
79+ // be the case.
80+ // b) By creating a new process group, we are safe to send the break signal to the
81+ // module. If we didn't specify the CREATE_NEW_PROCESS_GROUP flag, we would risk
82+ // shutting down viam-server as well, since they would be in the same process group.
83+ if err := windows .GenerateConsoleCtrlEvent (windows .CTRL_BREAK_EVENT , uint32 (p .cmd .Process .Pid )); err != nil {
84+ p .logger .Debugw ("sending a control break event to the process group failed with error" , "err" , err )
85+ }
4986 shouldJustForce = true
5087 case strings .Contains (string (out ), "not found" ):
51- return false , nil
88+ return false , & ProcessNotExistsError { err }
5289 default :
5390 return false , errors .Wrapf (err , "error killing process %d" , p .cmd .Process .Pid )
5491 }
5592 }
5693
57- if ! shouldJustForce {
58- // In case the process didn't stop, or left behind any orphan children in its process group,
59- // we now ask everything in the process tree to stop after a brief wait.
60- timer := time .NewTimer (p .stopWaitInterval )
61- defer timer .Stop ()
62- select {
63- case <- timer .C :
64- p .logger .Infof ("killing entire process tree %d" , p .cmd .Process .Pid )
65- if out , err := exec .Command ("taskkill" , "/t" , "/pid" , pidStr ).CombinedOutput (); err != nil {
66- switch {
67- case strings .Contains (string (out ), mustForce ):
68- p .logger .Debug ("must force terminate process tree" )
69- shouldJustForce = true
70- case strings .Contains (string (out ), "not found" ):
71- return false , nil
72- default :
73- return false , errors .Wrapf (err , "error killing process tree %d" , p .cmd .Process .Pid )
74- }
94+ // In case the process didn't stop, or left behind any orphan children in its process group,
95+ // we now ask everything in the process tree to stop after a brief wait.
96+ timer := time .NewTimer (p .stopWaitInterval )
97+ defer timer .Stop ()
98+ select {
99+ case <- timer .C :
100+ p .logger .Infof ("killing entire process tree %d" , p .cmd .Process .Pid )
101+ out , err := exec .Command ("taskkill" , "/t" , "/pid" , pidStr ).CombinedOutput ()
102+ if err != nil {
103+ switch {
104+ case strings .Contains (string (out ), mustForce ):
105+ p .logger .Debug ("must force terminate process tree" )
106+ shouldJustForce = true
107+ case strings .Contains (string (out ), "not found" ):
108+ return false , & ProcessNotExistsError {err }
109+ default :
110+ return false , errors .Wrapf (err , "error killing process tree %d" , p .cmd .Process .Pid )
75111 }
76- case <- p .managingCh :
77- timer .Stop ()
78112 }
113+ case <- p .managingCh :
114+ timer .Stop ()
79115 }
80116
81117 // Lastly, kill everything in the process tree that remains after a longer wait or now. This is
82118 // going to likely result in an "exit status 1" that we will have to interpret.
83119 // FUTURE(erd): find a way to do this better. Research has not come up with much and is
84120 // program dependent.
85- var forceKilled bool
86- if ! shouldJustForce {
87- timer2 := time .NewTimer (p .stopWaitInterval * 2 )
88- defer timer2 .Stop ()
89- select {
90- case <- timer2 .C :
91- p .logger .Infof ("force killing entire process tree %d" , p .cmd .Process .Pid )
92- if err := exec .Command ("taskkill" , "/t" , "/f" , "/pid" , pidStr ).Run (); err != nil {
93- return false , errors .Wrapf (err , "error force killing process tree %d" , p .cmd .Process .Pid )
121+
122+ // We can force kill the process group right away, if the flag is already set
123+ forceKillCommand := exec .Command ("taskkill" , "/t" , "/f" , "/pid" , pidStr )
124+ if shouldJustForce {
125+ if out , err := forceKillCommand .CombinedOutput (); err != nil {
126+ switch {
127+ case strings .Contains (string (out ), "not found" ):
128+ return false , & ProcessNotExistsError {err }
129+ default :
130+ return false , errors .Wrapf (err , "error killing process %d" , p .cmd .Process .Pid )
94131 }
95- forceKilled = true
96- case <- p .managingCh :
97- timer2 .Stop ()
98- }
99- } else {
100- if err := exec .Command ("taskkill" , "/t" , "/f" , "/pid" , pidStr ).Run (); err != nil {
101- return false , errors .Wrapf (err , "error force killing process tree %d" , p .cmd .Process .Pid )
102132 }
103- forceKilled = true
133+ return true , nil
104134 }
105135
106- return forceKilled , nil
136+ // If shouldJustForce is not set yet, we will wait on a timer to give managing channel a
137+ // final chance to close. If it doesn't, we will force kill the process tree.
138+ timer2 := time .NewTimer (p .stopWaitInterval * 2 )
139+ defer timer2 .Stop ()
140+ select {
141+ case <- timer2 .C :
142+ p .logger .Infof ("force killing entire process tree %d" , p .cmd .Process .Pid )
143+ if out , err := forceKillCommand .CombinedOutput (); err != nil {
144+ switch {
145+ case strings .Contains (string (out ), "not found" ):
146+ return false , & ProcessNotExistsError {err }
147+ default :
148+ return false , errors .Wrapf (err , "error killing process %d" , p .cmd .Process .Pid )
149+ }
150+ }
151+ return true , nil
152+ case <- p .managingCh :
153+ timer2 .Stop ()
154+ }
155+ return false , nil
107156}
108157
109158// forceKillGroup kills everything in the process tree. This will not wait for completion and may result in a zombie process.
@@ -112,3 +161,41 @@ func (p *managedProcess) forceKillGroup() error {
112161 p .logger .Infof ("force killing entire process tree %d" , p .cmd .Process .Pid )
113162 return exec .Command ("taskkill" , "/t" , "/f" , "/pid" , pidStr ).Start ()
114163}
164+
165+ // Status is a best effort method to return an os.ErrProcessDone in case the process no
166+ // longer exists.
167+ func (p * managedProcess ) status () error {
168+ p .mu .Lock ()
169+ defer p .mu .Unlock ()
170+ pid , err := p .UnixPid ()
171+ if err != nil {
172+ return err
173+ }
174+
175+ handle , err := windows .OpenProcess (windows .PROCESS_QUERY_LIMITED_INFORMATION , false , uint32 (pid ))
176+ defer windows .CloseHandle (handle )
177+ if err != nil {
178+ if err == windows .ERROR_INVALID_PARAMETER {
179+ // Bohdan: my understanding is that Invalid_Paramater is not a strong guarantee, but
180+ // it's highly likely that we can treat it as "ProcessDone".
181+ return os .ErrProcessDone
182+ }
183+ // A common error here could be Access_Denied, which would signal that the process
184+ // still exists.
185+ return err
186+ }
187+
188+ // To be extra sure, we can examine the exit code of the process handle.
189+ var exitCode uint32
190+ err = windows .GetExitCodeProcess (handle , & exitCode )
191+ if err != nil {
192+ return err
193+ }
194+ // Somehow, this constant is not defined in the windows library, but it looks like it's
195+ // a commonly used Windows constant to check that the process is still running.
196+ const STILL_ACTIVE = 259
197+ if exitCode != STILL_ACTIVE {
198+ return os .ErrProcessDone
199+ }
200+ return nil
201+ }
0 commit comments