Fix Goroutine Leak of Nomad execute command

that was triggered when [the execution timeout got exceeded, the runner got destroyed, or the WebSocket connection to CodeOcean closed] and the Allocation did not react to the SIGQUIT within the grace period.
This commit is contained in:
Maximilian Paß
2023-09-11 10:44:04 +02:00
parent 59da36303c
commit 6159f2a045
3 changed files with 14 additions and 16 deletions

View File

@ -45,8 +45,8 @@ func (s *WebSocketTestSuite) SetupTest() {
// default execution
s.executionID = tests.DefaultExecutionID
s.runner.StoreExecution(s.executionID, &executionRequestHead)
mockAPIExecuteHead(s.apiMock)
s.runner.StoreExecution(s.executionID, &executionRequestLs)
mockAPIExecuteLs(s.apiMock)
runnerManager := &runner.ManagerMock{}
runnerManager.On("Get", s.runner.ID()).Return(s.runner, nil)
@ -66,6 +66,8 @@ func (s *WebSocketTestSuite) TestWebsocketConnectionCanBeEstablished() {
s.Require().NoError(err)
conn, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
<-time.After(tests.ShortTimeout)
err = conn.Close()
s.NoError(err)
}
@ -90,6 +92,8 @@ func (s *WebSocketTestSuite) TestWebsocketReturns400IfRequestedViaHttp() {
}
func (s *WebSocketTestSuite) TestWebsocketConnection() {
s.runner.StoreExecution(s.executionID, &executionRequestHead)
mockAPIExecuteHead(s.apiMock)
wsURL, err := s.webSocketURL("ws", s.runner.ID(), s.executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
@ -461,6 +465,7 @@ func mockAPIExecuteExitNonZero(api *nomad.ExecutorAPIMock) {
func mockAPIExecute(api *nomad.ExecutorAPIMock, request *dto.ExecutionRequest,
run func(runnerId string, ctx context.Context, command string, tty bool,
stdin io.Reader, stdout, stderr io.Writer) (int, error)) {
tests.RemoveMethodFromMock(&api.Mock, "ExecuteCommand")
call := api.On("ExecuteCommand",
mock.AnythingOfType("string"),
mock.Anything,

View File

@ -281,7 +281,10 @@ func (r *NomadJob) executeCommand(ctx context.Context, command string, privilege
stdin io.ReadWriter, stdout, stderr io.Writer, exit chan<- ExitInfo,
) {
exitCode, err := r.api.ExecuteCommand(r.id, ctx, command, true, privilegedExecution, stdin, stdout, stderr)
exit <- ExitInfo{uint8(exitCode), err}
select {
case exit <- ExitInfo{uint8(exitCode), err}:
case <-ctx.Done():
}
}
func (r *NomadJob) handleExitOrContextDone(ctx context.Context, cancelExecute context.CancelFunc,

View File

@ -219,12 +219,8 @@ func (s *ExecuteInteractivelyTestSuite) TestSendsSignalAfterTimeout() {
}
func (s *ExecuteInteractivelyTestSuite) TestDestroysRunnerAfterTimeoutAndSignal() {
s.T().Skip("ToDo: Refactor NomadJob.executeCommand. Stuck in sending to channel") // ToDo
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.mockedExecuteCommandCall.Run(func(args mock.Arguments) {
<-ctx.Done()
<-s.TestCtx.Done()
})
runnerDestroyed := false
s.runner.onDestroy = func(_ Runner) error {
@ -255,10 +251,8 @@ func (s *ExecuteInteractivelyTestSuite) TestResetTimerGetsCalled() {
}
func (s *ExecuteInteractivelyTestSuite) TestExitHasTimeoutErrorIfRunnerTimesOut() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.mockedExecuteCommandCall.Run(func(args mock.Arguments) {
<-ctx.Done()
<-s.TestCtx.Done()
}).Return(0, nil)
s.mockedTimeoutPassedCall.Return(true)
executionRequest := &dto.ExecutionRequest{}
@ -274,12 +268,8 @@ func (s *ExecuteInteractivelyTestSuite) TestExitHasTimeoutErrorIfRunnerTimesOut(
}
func (s *ExecuteInteractivelyTestSuite) TestDestroyReasonIsPassedToExecution() {
s.T().Skip("See TestDestroysRunnerAfterTimeoutAndSignal")
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
s.mockedExecuteCommandCall.Run(func(args mock.Arguments) {
<-ctx.Done()
<-s.TestCtx.Done()
}).Return(0, nil)
s.mockedTimeoutPassedCall.Return(true)
executionRequest := &dto.ExecutionRequest{}