Skip to content

Commit

Permalink
[executor] kill process using PID from OCC
Browse files Browse the repository at this point in the history
  • Loading branch information
miltalex authored and teo committed Jul 22, 2020
1 parent c77c101 commit 592c6a6
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 6 deletions.
26 changes: 20 additions & 6 deletions executor/executable/controllabletask.go
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ func (t *ControllableTask) Kill() error {

log.Debug("end transition loop done")

pid := t.rpc.TaskCmd.Process.Pid
pid := int(response.GetPid())
_ = t.rpc.Close()
t.rpc = nil

Expand All @@ -439,9 +439,10 @@ func (t *ControllableTask) Kill() error {

killErrCh := make(chan error)
// When killing we must always use syscall.Kill with a negative PID, in order to kill all
// children which were assigned the same PGID at launch
// children which were assigned the same PGID at launch. Since we kill the child process,
// it should also terminate the shell that is wrapping the command, we avoid using negative PID
go func() {
err := syscall.Kill(-pid, syscall.SIGTERM)
err := syscall.Kill(pid, syscall.SIGTERM)
if err != nil {
log.WithError(err).
WithField("taskId", t.ti.GetTaskID()).
Expand All @@ -450,18 +451,31 @@ func (t *ControllableTask) Kill() error {
killErrCh <- err
}()


// Set a small timeout to SIGTERM if SIGTERM fails or timeout passes,
// we perform a SIGKILL.
select {
case killErr := <- killErrCh:
if killErr == nil {
return killErr
time.Sleep(10 * time.Second)
if pidExists(pid) {
// SIGINT for the "Waiting for graceful device shutdown.
// Hit Ctrl-C again to abort immediately" message.
killErr = syscall.Kill(pid, syscall.SIGINT)
if killErr != nil {
log.WithError(killErr).
WithField("taskId", t.ti.GetTaskID()).
Warning("could not gracefully kill task")
}
}
time.Sleep(10 * time.Second)
if !pidExists(pid) {
return killErr
}
}
case <-time.After(10 * time.Second):
}

killErr := syscall.Kill(-pid, syscall.SIGKILL)
killErr := syscall.Kill(pid, syscall.SIGKILL)
if killErr != nil {
log.WithError(killErr).
WithField("taskId", t.ti.GetTaskID()).
Expand Down
59 changes: 59 additions & 0 deletions executor/executable/pid_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* === This file is part of ALICE O² ===
*
* Copyright 2020 CERN and copyright holders of ALICE O².
* Author: Miltiadis Alexis <miltiadis.alexis@cern.ch>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* In applying this license CERN does not waive the privileges and
* immunities granted to it by virtue of its status as an
* Intergovernmental Organization or submit itself to any jurisdiction.
*/

package executable

import (
"os"
"syscall"
)

// pidExists will check if a pid process is running
func pidExists(pid int) (bool) {
if pid <= 0 {
return false
}
proc, err := os.FindProcess(pid)
if err != nil {
return false
}
err = proc.Signal(syscall.Signal(0))
if err == nil {
return true
}
if err.Error() == "os: process already finished" {
return false
}
errno, ok := err.(syscall.Errno)
if !ok {
return false
}
switch errno {
case syscall.ESRCH:
return false
case syscall.EPERM:
return true
}
return false
}

0 comments on commit 592c6a6

Please sign in to comment.