Refactor Nomad Recovery

from an approach that loaded the runners only once at the startup
to a method that will be repeated i.e. if the Nomad Event Stream connection interrupts.
This commit is contained in:
Maximilian Paß
2023-10-23 14:36:14 +02:00
committed by Sebastian Serth
parent b2898f9183
commit 6b69a2d732
22 changed files with 211 additions and 120 deletions

View File

@ -3,6 +3,7 @@ package api
import (
"bytes"
"encoding/json"
"fmt"
"github.com/gorilla/mux"
"github.com/openHPI/poseidon/internal/environment"
"github.com/openHPI/poseidon/internal/nomad"
@ -19,6 +20,8 @@ import (
"testing"
)
const jobHCLBasicFormat = "job \"%s\" {}"
type EnvironmentControllerTestSuite struct {
tests.MemoryLeakTestSuite
manager *environment.ManagerHandlerMock
@ -92,10 +95,10 @@ func (s *EnvironmentControllerTestSuite) TestList() {
call.Run(func(args mock.Arguments) {
firstEnvironment, err := environment.NewNomadEnvironment(tests.DefaultEnvironmentIDAsInteger, nil,
"job \""+nomad.TemplateJobID(tests.DefaultEnvironmentIDAsInteger)+"\" {}")
fmt.Sprintf(jobHCLBasicFormat, nomad.TemplateJobID(tests.DefaultEnvironmentIDAsInteger)))
s.Require().NoError(err)
secondEnvironment, err := environment.NewNomadEnvironment(tests.DefaultEnvironmentIDAsInteger, nil,
"job \""+nomad.TemplateJobID(tests.AnotherEnvironmentIDAsInteger)+"\" {}")
fmt.Sprintf(jobHCLBasicFormat, nomad.TemplateJobID(tests.DefaultEnvironmentIDAsInteger)))
s.Require().NoError(err)
call.ReturnArguments = mock.Arguments{[]runner.ExecutionEnvironment{firstEnvironment, secondEnvironment}, nil}
})
@ -156,7 +159,7 @@ func (s *EnvironmentControllerTestSuite) TestGet() {
call.Run(func(args mock.Arguments) {
testEnvironment, err := environment.NewNomadEnvironment(tests.DefaultEnvironmentIDAsInteger, nil,
"job \""+nomad.TemplateJobID(tests.DefaultEnvironmentIDAsInteger)+"\" {}")
fmt.Sprintf(jobHCLBasicFormat, nomad.TemplateJobID(tests.DefaultEnvironmentIDAsInteger)))
s.Require().NoError(err)
call.ReturnArguments = mock.Arguments{testEnvironment, nil}
})

View File

@ -107,7 +107,7 @@ func (cw *codeOceanOutputWriter) Close(info *runner.ExitInfo) {
case errors.Is(info.Err, context.DeadlineExceeded) || errors.Is(info.Err, runner.ErrorRunnerInactivityTimeout):
cw.send(&dto.WebSocketMessage{Type: dto.WebSocketMetaTimeout})
case errors.Is(info.Err, runner.ErrOOMKilled):
cw.send(&dto.WebSocketMessage{Type: dto.WebSocketOutputError, Data: runner.ErrOOMKilled.Error()})
cw.send(&dto.WebSocketMessage{Type: dto.WebSocketOutputError, Data: dto.ErrOOMKilled.Error()})
case errors.Is(info.Err, nomad.ErrorAllocationCompleted), errors.Is(info.Err, runner.ErrDestroyedByAPIRequest):
message := "the allocation stopped as expected"
log.WithContext(cw.ctx).WithError(info.Err).Trace(message)