Files
poseidon/internal/nomad/nomad_test.go
Maximilian Paß 19e0ae1583 Fix concurrent map write
in the Nomad `evaluations` map by replacing the simple map with our concurrency-ready storage object.
2024-04-17 13:19:49 +02:00

984 lines
37 KiB
Go

package nomad
import (
"bytes"
"context"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
"github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/pkg/nullio"
"github.com/openHPI/poseidon/pkg/storage"
"github.com/openHPI/poseidon/tests"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"io"
"regexp"
"strings"
"testing"
"time"
)
var (
noopAllocationProcessing = &AllocationProcessing{
OnNew: func(_ *nomadApi.Allocation, _ time.Duration) {},
OnDeleted: func(_ string, _ error) bool { return false },
}
ErrUnexpectedEOF = errors.New("unexpected EOF")
)
func TestLoadRunnersTestSuite(t *testing.T) {
suite.Run(t, new(LoadRunnersTestSuite))
}
type LoadRunnersTestSuite struct {
tests.MemoryLeakTestSuite
jobID string
mock *apiQuerierMock
nomadAPIClient APIClient
availableRunner *nomadApi.JobListStub
anotherAvailableRunner *nomadApi.JobListStub
pendingRunner *nomadApi.JobListStub
deadRunner *nomadApi.JobListStub
}
func (s *LoadRunnersTestSuite) SetupTest() {
s.MemoryLeakTestSuite.SetupTest()
s.jobID = tests.DefaultRunnerID
s.mock = &apiQuerierMock{}
s.nomadAPIClient = APIClient{apiQuerier: s.mock}
s.availableRunner = newJobListStub(tests.DefaultRunnerID, structs.JobStatusRunning, 1)
s.anotherAvailableRunner = newJobListStub(tests.AnotherRunnerID, structs.JobStatusRunning, 1)
s.pendingRunner = newJobListStub(tests.DefaultRunnerID+"-1", structs.JobStatusPending, 0)
s.deadRunner = newJobListStub(tests.AnotherRunnerID+"-1", structs.JobStatusDead, 0)
}
func newJobListStub(id, status string, amountRunning int) *nomadApi.JobListStub {
return &nomadApi.JobListStub{
ID: id,
Status: status,
JobSummary: &nomadApi.JobSummary{
JobID: id,
Summary: map[string]nomadApi.TaskGroupSummary{TaskGroupName: {Running: amountRunning}},
},
}
}
func (s *LoadRunnersTestSuite) TestErrorOfUnderlyingApiCallIsPropagated() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return(nil, tests.ErrDefault)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Nil(returnedIds)
s.Equal(tests.ErrDefault, err)
}
func (s *LoadRunnersTestSuite) TestReturnsNoErrorWhenUnderlyingApiCallDoesNot() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{}, nil)
_, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.NoError(err)
}
func (s *LoadRunnersTestSuite) TestAvailableRunnerIsReturned() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{s.availableRunner}, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Len(returnedIds, 1)
s.Equal(s.availableRunner.ID, returnedIds[0])
}
func (s *LoadRunnersTestSuite) TestPendingRunnerIsReturned() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{s.pendingRunner}, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Len(returnedIds, 1)
s.Equal(s.pendingRunner.ID, returnedIds[0])
}
func (s *LoadRunnersTestSuite) TestDeadRunnerIsNotReturned() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{s.deadRunner}, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Empty(returnedIds)
}
func (s *LoadRunnersTestSuite) TestReturnsAllAvailableRunners() {
runnersList := []*nomadApi.JobListStub{
s.availableRunner,
s.anotherAvailableRunner,
s.pendingRunner,
s.deadRunner,
}
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return(runnersList, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Len(returnedIds, 3)
s.Contains(returnedIds, s.availableRunner.ID)
s.Contains(returnedIds, s.anotherAvailableRunner.ID)
s.Contains(returnedIds, s.pendingRunner.ID)
s.NotContains(returnedIds, s.deadRunner.ID)
}
const TestNamespace = "unit-tests"
const TestNomadToken = "n0m4d-t0k3n"
const TestDefaultAddress = "127.0.0.1"
const evaluationID = "evaluation-id"
func NomadTestConfig(address string) *config.Nomad {
return &config.Nomad{
Address: address,
Port: 4646,
Token: TestNomadToken,
TLS: config.TLS{
Active: false,
},
Namespace: TestNamespace,
}
}
func (s *MainTestSuite) TestApiClient_init() {
client := &APIClient{apiQuerier: &nomadAPIClient{}}
err := client.init(NomadTestConfig(TestDefaultAddress))
s.Require().Nil(err)
}
func (s *MainTestSuite) TestApiClientCanNotBeInitializedWithInvalidUrl() {
client := &APIClient{apiQuerier: &nomadAPIClient{}}
err := client.init(NomadTestConfig("http://" + TestDefaultAddress))
s.NotNil(err)
}
func (s *MainTestSuite) TestNewExecutorApiCanBeCreatedWithoutError() {
expectedClient := &APIClient{apiQuerier: &nomadAPIClient{}}
err := expectedClient.init(NomadTestConfig(TestDefaultAddress))
s.Require().Nil(err)
_, err = NewExecutorAPI(NomadTestConfig(TestDefaultAddress))
s.Require().Nil(err)
}
// asynchronouslyMonitorEvaluation creates an APIClient with mocked Nomad API and
// runs the MonitorEvaluation method in a goroutine. The mock returns a read-only
// version of the given stream to simulate an event stream gotten from the real
// Nomad API.
func asynchronouslyMonitorEvaluation(stream <-chan *nomadApi.Events) chan error {
ctx := context.Background()
// We can only get a read-only channel once we return it from a function.
readOnlyStream := func() <-chan *nomadApi.Events { return stream }()
apiMock := &apiQuerierMock{}
apiMock.On("EventStream", mock.AnythingOfType("*context.cancelCtx")).
Return(readOnlyStream, nil)
apiClient := &APIClient{apiMock, storage.NewLocalStorage[chan error](), storage.NewLocalStorage[*allocationData](), false}
errChan := make(chan error)
go func() {
errChan <- apiClient.MonitorEvaluation(evaluationID, ctx)
}()
return errChan
}
func (s *MainTestSuite) TestApiClient_MonitorEvaluationReturnsNilWhenStreamIsClosed() {
stream := make(chan *nomadApi.Events)
errChan := asynchronouslyMonitorEvaluation(stream)
close(stream)
var err error
// If close doesn't terminate MonitorEvaluation, this test won't complete without a timeout.
select {
case err = <-errChan:
case <-time.After(time.Millisecond * 10):
s.T().Fatal("MonitorEvaluation didn't finish as expected")
}
s.Nil(err)
}
func (s *MainTestSuite) TestApiClient_MonitorEvaluationReturnsErrorWhenStreamReturnsError() {
apiMock := &apiQuerierMock{}
apiMock.On("EventStream", mock.AnythingOfType("*context.cancelCtx")).
Return(nil, tests.ErrDefault)
apiClient := &APIClient{apiMock, storage.NewLocalStorage[chan error](), storage.NewLocalStorage[*allocationData](), false}
err := apiClient.MonitorEvaluation("id", context.Background())
s.ErrorIs(err, tests.ErrDefault)
}
type eventPayload struct {
Evaluation *nomadApi.Evaluation
Allocation *nomadApi.Allocation
}
// eventForEvaluation takes an evaluation and creates an Event with the given evaluation
// as its payload. Nomad uses the mapstructure library to decode the payload, which we
// simply reverse here.
func eventForEvaluation(t *testing.T, eval *nomadApi.Evaluation) nomadApi.Event {
t.Helper()
payload := make(map[string]interface{})
err := mapstructure.Decode(eventPayload{Evaluation: eval}, &payload)
if err != nil {
t.Fatalf("Couldn't decode evaluation %v into payload map", eval)
return nomadApi.Event{}
}
event := nomadApi.Event{Topic: nomadApi.TopicEvaluation, Payload: payload}
return event
}
// simulateNomadEventStream streams the given events sequentially to the stream channel.
// It returns how many events have been processed until an error occurred.
func simulateNomadEventStream(
ctx context.Context,
stream chan<- *nomadApi.Events,
errChan chan error,
events []*nomadApi.Events,
) (int, error) {
eventsProcessed := 0
var e *nomadApi.Events
for _, e = range events {
select {
case err := <-errChan:
return eventsProcessed, err
case stream <- e:
eventsProcessed++
}
}
close(stream)
// Wait for last event being processed
var err error
select {
case <-ctx.Done():
case err = <-errChan:
}
return eventsProcessed, err
}
// runEvaluationMonitoring simulates events streamed from the Nomad event stream
// to the MonitorEvaluation method. It starts the MonitorEvaluation function as a goroutine
// and sequentially transfers the events from the given array to a channel simulating the stream.
func runEvaluationMonitoring(ctx context.Context, events []*nomadApi.Events) (eventsProcessed int, err error) {
stream := make(chan *nomadApi.Events)
errChan := asynchronouslyMonitorEvaluation(stream)
return simulateNomadEventStream(ctx, stream, errChan, events)
}
func (s *MainTestSuite) TestApiClient_MonitorEvaluationWithSuccessfulEvent() {
eval := nomadApi.Evaluation{Status: structs.EvalStatusComplete}
pendingEval := nomadApi.Evaluation{Status: structs.EvalStatusPending}
// make sure that the tested function can complete
s.Require().Nil(checkEvaluation(&eval))
events := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(s.T(), &eval)}}
pendingEvaluationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(s.T(), &pendingEval)}}
multipleEventsWithPending := nomadApi.Events{Events: []nomadApi.Event{
eventForEvaluation(s.T(), &pendingEval), eventForEvaluation(s.T(), &eval),
}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedEventsProcessed int
name string
}{
{[]*nomadApi.Events{&events}, 1,
"it completes with successful event"},
{[]*nomadApi.Events{&events, &events}, 2,
"it keeps listening after first successful event"},
{[]*nomadApi.Events{{}, &events}, 2,
"it skips heartbeat and completes"},
{[]*nomadApi.Events{&pendingEvaluationEvents, &events}, 2,
"it skips pending evaluation and completes"},
{[]*nomadApi.Events{&multipleEventsWithPending}, 1,
"it handles multiple events per received event"},
}
for _, c := range cases {
s.Run(c.name, func() {
eventsProcessed, err := runEvaluationMonitoring(s.TestCtx, c.streamedEvents)
s.Nil(err)
s.Equal(c.expectedEventsProcessed, eventsProcessed)
})
}
}
func (s *MainTestSuite) TestApiClient_MonitorEvaluationWithFailingEvent() {
eval := nomadApi.Evaluation{ID: evaluationID, Status: structs.EvalStatusFailed}
evalErr := checkEvaluation(&eval)
s.Require().NotNil(evalErr)
pendingEval := nomadApi.Evaluation{Status: structs.EvalStatusPending}
events := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(s.T(), &eval)}}
pendingEvaluationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(s.T(), &pendingEval)}}
multipleEventsWithPending := nomadApi.Events{Events: []nomadApi.Event{
eventForEvaluation(s.T(), &pendingEval), eventForEvaluation(s.T(), &eval),
}}
eventsWithErr := nomadApi.Events{Err: tests.ErrDefault, Events: []nomadApi.Event{{}}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedEventsProcessed int
expectedError error
name string
}{
{[]*nomadApi.Events{&events}, 1, evalErr,
"it fails with failing event"},
{[]*nomadApi.Events{{}, &events}, 2, evalErr,
"it skips heartbeat and fail"},
{[]*nomadApi.Events{&pendingEvaluationEvents, &events}, 2, evalErr,
"it skips pending evaluation and fail"},
{[]*nomadApi.Events{&multipleEventsWithPending}, 1, evalErr,
"it handles multiple events per received event and fails"},
{[]*nomadApi.Events{&eventsWithErr}, 1, tests.ErrDefault,
"it fails with event error when event has error"},
}
for _, c := range cases {
s.Run(c.name, func() {
eventsProcessed, err := runEvaluationMonitoring(s.TestCtx, c.streamedEvents)
s.Require().NotNil(err)
s.Contains(err.Error(), c.expectedError.Error())
s.Equal(c.expectedEventsProcessed, eventsProcessed)
})
}
}
func (s *MainTestSuite) TestApiClient_MonitorEvaluationFailsWhenFailingToDecodeEvaluation() {
event := nomadApi.Event{
Topic: nomadApi.TopicEvaluation,
// This should fail decoding, as Evaluation.Status is expected to be a string, not int
Payload: map[string]interface{}{"Evaluation": map[string]interface{}{"Status": 1}},
}
_, err := event.Evaluation()
s.Require().NotNil(err)
eventsProcessed, err := runEvaluationMonitoring(s.TestCtx, []*nomadApi.Events{{Events: []nomadApi.Event{event}}})
s.Error(err)
s.Equal(1, eventsProcessed)
}
func (s *MainTestSuite) TestCheckEvaluationWithFailedAllocations() {
testKey := "test1"
failedAllocs := map[string]*nomadApi.AllocationMetric{
testKey: {NodesExhausted: 1},
}
evaluation := nomadApi.Evaluation{FailedTGAllocs: failedAllocs, Status: structs.EvalStatusFailed}
assertMessageContainsCorrectStrings := func(msg string) {
s.Contains(msg, evaluation.Status, "error should contain the evaluation status")
s.Contains(msg, fmt.Sprintf("%s: %#v", testKey, failedAllocs[testKey]),
"error should contain the failed allocations metric")
}
var msgWithoutBlockedEval, msgWithBlockedEval string
s.Run("without blocked eval", func() {
err := checkEvaluation(&evaluation)
s.Require().NotNil(err)
msgWithoutBlockedEval = err.Error()
assertMessageContainsCorrectStrings(msgWithoutBlockedEval)
})
s.Run("with blocked eval", func() {
evaluation.BlockedEval = "blocking-eval"
err := checkEvaluation(&evaluation)
s.Require().NotNil(err)
msgWithBlockedEval = err.Error()
assertMessageContainsCorrectStrings(msgWithBlockedEval)
})
s.NotEqual(msgWithBlockedEval, msgWithoutBlockedEval)
}
func (s *MainTestSuite) TestCheckEvaluationWithoutFailedAllocations() {
evaluation := nomadApi.Evaluation{FailedTGAllocs: make(map[string]*nomadApi.AllocationMetric)}
s.Run("when evaluation status complete", func() {
evaluation.Status = structs.EvalStatusComplete
err := checkEvaluation(&evaluation)
s.Nil(err)
})
s.Run("when evaluation status not complete", func() {
incompleteStates := []string{structs.EvalStatusFailed, structs.EvalStatusCancelled,
structs.EvalStatusBlocked, structs.EvalStatusPending}
for _, status := range incompleteStates {
evaluation.Status = status
err := checkEvaluation(&evaluation)
s.Require().NotNil(err)
s.Contains(err.Error(), status, "error should contain the evaluation status")
}
})
}
func (s *MainTestSuite) TestApiClient_WatchAllocationsIgnoresOldAllocations() {
oldStoppedAllocation := createOldAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusStop)
oldPendingAllocation := createOldAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
oldRunningAllocation := createOldAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
oldAllocationEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(s.T(), oldStoppedAllocation),
eventForAllocation(s.T(), oldPendingAllocation),
eventForAllocation(s.T(), oldRunningAllocation),
}}
assertWatchAllocation(s, []*nomadApi.Events{&oldAllocationEvents},
[]*nomadApi.Allocation(nil), []string(nil))
}
func createOldAllocation(clientStatus, desiredStatus string) *nomadApi.Allocation {
return createAllocation(time.Now().Add(-time.Minute).UnixNano(), clientStatus, desiredStatus)
}
func (s *MainTestSuite) TestApiClient_WatchAllocationsIgnoresUnhandledEvents() {
nodeEvents := nomadApi.Events{Events: []nomadApi.Event{
{
Topic: nomadApi.TopicNode,
Type: structs.TypeNodeEvent,
},
}}
assertWatchAllocation(s, []*nomadApi.Events{&nodeEvents}, []*nomadApi.Allocation(nil), []string(nil))
}
func (s *MainTestSuite) TestApiClient_WatchAllocationsUsesCallbacksForEvents() {
pendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
pendingEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(s.T(), pendingAllocation)}}
s.Run("it does not add allocation when client status is pending", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingEvents}, []*nomadApi.Allocation(nil), []string(nil))
})
startedAllocation := createRecentAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
startedEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(s.T(), startedAllocation)}}
pendingStartedEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(s.T(), pendingAllocation), eventForAllocation(s.T(), startedAllocation)}}
s.Run("it adds allocation with matching events", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents},
[]*nomadApi.Allocation{startedAllocation}, []string(nil))
})
s.Run("it skips heartbeat and adds allocation with matching events", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents},
[]*nomadApi.Allocation{startedAllocation}, []string(nil))
})
stoppedAllocation := createRecentAllocation(structs.AllocClientStatusComplete, structs.AllocDesiredStatusStop)
stoppedEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(s.T(), stoppedAllocation)}}
pendingStartStopEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(s.T(), pendingAllocation),
eventForAllocation(s.T(), startedAllocation),
eventForAllocation(s.T(), stoppedAllocation),
}}
s.Run("it adds and deletes the allocation", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartStopEvents},
[]*nomadApi.Allocation{startedAllocation}, []string{stoppedAllocation.JobID})
})
s.Run("it ignores duplicate events", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingEvents, &startedEvents, &startedEvents,
&stoppedEvents, &stoppedEvents, &stoppedEvents},
[]*nomadApi.Allocation{startedAllocation}, []string{startedAllocation.JobID})
})
s.Run("it ignores events of unknown allocations", func() {
assertWatchAllocation(s, []*nomadApi.Events{&startedEvents, &startedEvents,
&stoppedEvents, &stoppedEvents, &stoppedEvents}, []*nomadApi.Allocation(nil), []string(nil))
})
s.Run("it removes restarted allocations", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents, &pendingStartedEvents},
[]*nomadApi.Allocation{startedAllocation, startedAllocation}, []string{startedAllocation.JobID})
})
rescheduleAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
rescheduleAllocation.ID = tests.AnotherUUID
rescheduleAllocation.PreviousAllocation = pendingAllocation.ID
rescheduleStartedAllocation := createRecentAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
rescheduleStartedAllocation.ID = tests.AnotherUUID
rescheduleAllocation.PreviousAllocation = pendingAllocation.ID
rescheduleEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(s.T(), rescheduleAllocation), eventForAllocation(s.T(), rescheduleStartedAllocation)}}
s.Run("it removes rescheduled allocations", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents, &rescheduleEvents},
[]*nomadApi.Allocation{startedAllocation, rescheduleStartedAllocation}, []string{startedAllocation.JobID})
})
stoppedPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusStop)
stoppedPendingEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(s.T(), stoppedPendingAllocation)}}
s.Run("it does not callback for stopped pending allocations", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingEvents, &stoppedPendingEvents},
[]*nomadApi.Allocation(nil), []string(nil))
})
failedAllocation := createRecentAllocation(structs.AllocClientStatusFailed, structs.AllocDesiredStatusStop)
failedEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(s.T(), failedAllocation)}}
s.Run("it removes stopped failed allocations", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents, &failedEvents},
[]*nomadApi.Allocation{startedAllocation}, []string{failedAllocation.JobID})
})
lostAllocation := createRecentAllocation(structs.AllocClientStatusLost, structs.AllocDesiredStatusStop)
lostEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(s.T(), lostAllocation)}}
s.Run("it removes stopped lost allocations", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents, &lostEvents},
[]*nomadApi.Allocation{startedAllocation}, []string{lostAllocation.JobID})
})
rescheduledLostAllocation := createRecentAllocation(structs.AllocClientStatusLost, structs.AllocDesiredStatusStop)
rescheduledLostAllocation.NextAllocation = tests.AnotherUUID
rescheduledLostEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(s.T(), rescheduledLostAllocation)}}
s.Run("it removes lost allocations not before the last restart attempt", func() {
assertWatchAllocation(s, []*nomadApi.Events{&pendingStartedEvents, &rescheduledLostEvents},
[]*nomadApi.Allocation{startedAllocation}, []string(nil))
})
}
func (s *MainTestSuite) TestHandleAllocationEventBuffersPendingAllocation() {
s.Run("AllocationUpdated", func() {
newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
newPendingEvent := eventForAllocation(s.T(), newPendingAllocation)
allocations := storage.NewLocalStorage[*allocationData]()
err := handleAllocationEvent(
time.Now().UnixNano(), allocations, &newPendingEvent, noopAllocationProcessing)
s.Require().NoError(err)
_, ok := allocations.Get(newPendingAllocation.ID)
s.True(ok)
})
s.Run("PlanResult", func() {
newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
newPendingEvent := eventForAllocation(s.T(), newPendingAllocation)
newPendingEvent.Type = structs.TypePlanResult
allocations := storage.NewLocalStorage[*allocationData]()
err := handleAllocationEvent(
time.Now().UnixNano(), allocations, &newPendingEvent, noopAllocationProcessing)
s.Require().NoError(err)
_, ok := allocations.Get(newPendingAllocation.ID)
s.True(ok)
})
}
func (s *MainTestSuite) TestHandleAllocationEvent_RegressionTest_14_09_2023() {
jobID := "29-6f04b525-5315-11ee-af32-fa163e079f19"
a1ID := "04d86250-550c-62f9-9a21-ecdc3b38773e"
a1Starting := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
a1Starting.ID = a1ID
a1Starting.JobID = jobID
// With this event the job is added to the idle runners
a1Running := createRecentAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
a1Running.ID = a1ID
a1Running.JobID = jobID
// With this event the job is removed from the idle runners
a2ID := "102f282f-376a-1453-4d3d-7d4e32046acd"
a2Starting := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
a2Starting.ID = a2ID
a2Starting.PreviousAllocation = a1ID
a2Starting.JobID = jobID
// Because the runner is neither an idle runner nor an used runner, this event triggered the now removed
// race condition handling that led to neither removing a2 from the allocations nor adding a3 to the allocations.
a3ID := "0d8a8ece-cf52-2968-5a9f-e972a4150a6e"
a3Starting := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
a3Starting.ID = a3ID
a3Starting.PreviousAllocation = a2ID
a3Starting.JobID = jobID
// a2Stopping was not ignored and led to an unexpected allocation stopping.
a2Stopping := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusStop)
a2Stopping.ID = a2ID
a2Stopping.PreviousAllocation = a1ID
a2Stopping.NextAllocation = a3ID
a2Stopping.JobID = jobID
// a2Complete was not ignored (wrong behavior).
a2Complete := createRecentAllocation(structs.AllocClientStatusComplete, structs.AllocDesiredStatusStop)
a2Complete.ID = a2ID
a2Complete.PreviousAllocation = a1ID
a2Complete.NextAllocation = a3ID
a2Complete.JobID = jobID
// a3Running was ignored because it was unknown (wrong behavior).
a3Running := createRecentAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
a3Running.ID = a3ID
a3Running.PreviousAllocation = a2ID
a3Running.JobID = jobID
events := []*nomadApi.Events{{Events: []nomadApi.Event{
eventForAllocation(s.T(), a1Starting),
eventForAllocation(s.T(), a1Running),
eventForAllocation(s.T(), a2Starting),
eventForAllocation(s.T(), a3Starting),
eventForAllocation(s.T(), a2Stopping),
eventForAllocation(s.T(), a2Complete),
eventForAllocation(s.T(), a3Running),
}}}
idleRunner := make(map[string]bool)
callbacks := &AllocationProcessing{
OnNew: func(alloc *nomadApi.Allocation, _ time.Duration) {
idleRunner[alloc.JobID] = true
},
OnDeleted: func(jobID string, _ error) bool {
_, ok := idleRunner[jobID]
delete(idleRunner, jobID)
return !ok
},
}
_, err := runAllocationWatching(s, events, callbacks)
s.NoError(err)
s.True(idleRunner[jobID])
}
func (s *MainTestSuite) TestHandleAllocationEvent_ReportsOOMKilledStatus() {
restartedAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
event := nomadApi.TaskEvent{Details: map[string]string{"oom_killed": "true"}}
state := nomadApi.TaskState{Restarts: 1, Events: []*nomadApi.TaskEvent{&event}}
restartedAllocation.TaskStates = map[string]*nomadApi.TaskState{TaskName: &state}
restartedEvent := eventForAllocation(s.T(), restartedAllocation)
allocations := storage.NewLocalStorage[*allocationData]()
allocations.Add(restartedAllocation.ID, &allocationData{jobID: restartedAllocation.JobID})
var reason error
err := handleAllocationEvent(time.Now().UnixNano(), allocations, &restartedEvent, &AllocationProcessing{
OnNew: func(_ *nomadApi.Allocation, _ time.Duration) {},
OnDeleted: func(_ string, r error) bool {
reason = r
return true
},
})
s.Require().NoError(err)
s.ErrorIs(reason, ErrorOOMKilled)
}
func (s *MainTestSuite) TestAPIClient_WatchAllocationsReturnsErrorWhenAllocationStreamCannotBeRetrieved() {
apiMock := &apiQuerierMock{}
apiMock.On("EventStream", mock.Anything).Return(nil, tests.ErrDefault)
apiClient := &APIClient{apiMock, storage.NewLocalStorage[chan error](), storage.NewLocalStorage[*allocationData](), false}
err := apiClient.WatchEventStream(context.Background(), noopAllocationProcessing)
s.ErrorIs(err, tests.ErrDefault)
}
// Test case: WatchAllocations returns an error when an allocation cannot be retrieved without receiving further events.
func (s *MainTestSuite) TestAPIClient_WatchAllocations() {
event := nomadApi.Event{
Type: structs.TypeAllocationUpdated,
Topic: nomadApi.TopicAllocation,
// This should fail decoding, as Allocation.ID is expected to be a string, not int
Payload: map[string]interface{}{"Allocation": map[string]interface{}{"ID": 1}},
}
_, err := event.Allocation()
s.Require().Error(err)
events := []*nomadApi.Events{{Events: []nomadApi.Event{event}}, {}}
eventsProcessed, err := runAllocationWatching(s, events, noopAllocationProcessing)
s.Error(err)
s.Equal(1, eventsProcessed)
}
func (s *MainTestSuite) TestAPIClient_WatchAllocationsReturnsErrorOnUnexpectedEOF() {
events := []*nomadApi.Events{{Err: ErrUnexpectedEOF}, {}}
eventsProcessed, err := runAllocationWatching(s, events, noopAllocationProcessing)
s.Error(err)
s.Equal(1, eventsProcessed)
}
func assertWatchAllocation(s *MainTestSuite, events []*nomadApi.Events,
expectedNewAllocations []*nomadApi.Allocation, expectedDeletedAllocations []string) {
s.T().Helper()
var newAllocations []*nomadApi.Allocation
var deletedAllocations []string
callbacks := &AllocationProcessing{
OnNew: func(alloc *nomadApi.Allocation, _ time.Duration) {
newAllocations = append(newAllocations, alloc)
},
OnDeleted: func(jobID string, _ error) bool {
deletedAllocations = append(deletedAllocations, jobID)
return false
},
}
eventsProcessed, err := runAllocationWatching(s, events, callbacks)
s.NoError(err)
s.Equal(len(events), eventsProcessed)
s.Equal(expectedNewAllocations, newAllocations)
s.Equal(expectedDeletedAllocations, deletedAllocations)
}
// runAllocationWatching simulates events streamed from the Nomad event stream
// to the MonitorEvaluation method. It starts the MonitorEvaluation function as a goroutine
// and sequentially transfers the events from the given array to a channel simulating the stream.
func runAllocationWatching(s *MainTestSuite, events []*nomadApi.Events, callbacks *AllocationProcessing) (
eventsProcessed int, err error) {
s.T().Helper()
stream := make(chan *nomadApi.Events)
errChan := asynchronouslyWatchAllocations(stream, callbacks)
return simulateNomadEventStream(s.TestCtx, stream, errChan, events)
}
// asynchronouslyMonitorEvaluation creates an APIClient with mocked Nomad API and
// runs the MonitorEvaluation method in a goroutine. The mock returns a read-only
// version of the given stream to simulate an event stream gotten from the real
// Nomad API.
func asynchronouslyWatchAllocations(stream chan *nomadApi.Events, callbacks *AllocationProcessing) chan error {
ctx := context.Background()
// We can only get a read-only channel once we return it from a function.
readOnlyStream := func() <-chan *nomadApi.Events { return stream }()
apiMock := &apiQuerierMock{}
apiMock.On("EventStream", ctx).Return(readOnlyStream, nil)
apiClient := &APIClient{apiMock, storage.NewLocalStorage[chan error](), storage.NewLocalStorage[*allocationData](), false}
errChan := make(chan error)
go func() {
errChan <- apiClient.WatchEventStream(ctx, callbacks)
}()
return errChan
}
// eventForEvaluation takes an evaluation and creates an Event with the given evaluation
// as its payload. Nomad uses the mapstructure library to decode the payload, which we
// simply reverse here.
func eventForAllocation(t *testing.T, alloc *nomadApi.Allocation) nomadApi.Event {
t.Helper()
payload := make(map[string]interface{})
err := mapstructure.Decode(eventPayload{Allocation: alloc}, &payload)
if err != nil {
t.Fatalf("Couldn't decode allocation %v into payload map", err)
return nomadApi.Event{}
}
event := nomadApi.Event{
Topic: nomadApi.TopicAllocation,
Type: structs.TypeAllocationUpdated,
Payload: payload,
}
return event
}
func createAllocation(modifyTime int64, clientStatus, desiredStatus string) *nomadApi.Allocation {
return &nomadApi.Allocation{
ID: tests.DefaultUUID,
JobID: tests.DefaultRunnerID,
ModifyTime: modifyTime,
ClientStatus: clientStatus,
DesiredStatus: desiredStatus,
}
}
func createRecentAllocation(clientStatus, desiredStatus string) *nomadApi.Allocation {
return createAllocation(time.Now().Add(time.Minute).UnixNano(), clientStatus, desiredStatus)
}
func TestExecuteCommandTestSuite(t *testing.T) {
suite.Run(t, new(ExecuteCommandTestSuite))
}
type ExecuteCommandTestSuite struct {
tests.MemoryLeakTestSuite
allocationID string
ctx context.Context
testCommand string
expectedStdout string
expectedStderr string
apiMock *apiQuerierMock
nomadAPIClient APIClient
}
func (s *ExecuteCommandTestSuite) SetupTest() {
s.MemoryLeakTestSuite.SetupTest()
s.allocationID = "test-allocation-id"
s.ctx = context.Background()
s.testCommand = "echo \"do nothing\""
s.expectedStdout = "stdout"
s.expectedStderr = "stderr"
s.apiMock = &apiQuerierMock{}
s.nomadAPIClient = APIClient{apiQuerier: s.apiMock}
}
const withTTY = true
func (s *ExecuteCommandTestSuite) TestWithSeparateStderr() {
config.Config.Server.InteractiveStderr = true
commandExitCode := 42
stderrExitCode := 1
var stdout, stderr bytes.Buffer
var calledStdoutCommand, calledStderrCommand string
runFn := func(args mock.Arguments) {
var ok bool
calledCommand, ok := args.Get(2).(string)
s.Require().True(ok)
var out string
if isStderrCommand := strings.Contains(calledCommand, "mkfifo"); isStderrCommand {
calledStderrCommand = calledCommand
out = s.expectedStderr
} else {
calledStdoutCommand = calledCommand
out = s.expectedStdout
}
writer, ok := args.Get(5).(io.Writer)
s.Require().True(ok)
_, err := writer.Write([]byte(out))
s.Require().NoError(err)
}
s.apiMock.On("Execute", s.allocationID, mock.Anything, mock.Anything, withTTY,
mock.AnythingOfType("nullio.Reader"), mock.Anything, mock.Anything).Run(runFn).Return(stderrExitCode, nil)
s.apiMock.On("Execute", s.allocationID, mock.Anything, mock.Anything, withTTY,
mock.AnythingOfType("*bytes.Buffer"), mock.Anything, mock.Anything).Run(runFn).Return(commandExitCode, nil)
exitCode, err := s.nomadAPIClient.ExecuteCommand(s.allocationID, s.ctx, s.testCommand, withTTY,
UnprivilegedExecution, &bytes.Buffer{}, &stdout, &stderr)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "Execute", 2)
s.Equal(commandExitCode, exitCode)
s.Run("should wrap command in stderr wrapper", func() {
s.Require().NotEmpty(calledStdoutCommand)
stderrWrapperCommand := fmt.Sprintf(stderrWrapperCommandFormat, stderrFifoFormat, s.testCommand, stderrFifoFormat)
stdoutFifoRegexp := strings.ReplaceAll(regexp.QuoteMeta(stderrWrapperCommand), "%d", "\\d*")
stdoutFifoRegexp = strings.Replace(stdoutFifoRegexp, s.testCommand, ".*", 1)
s.Regexp(stdoutFifoRegexp, calledStdoutCommand)
})
s.Run("should call correct stderr command", func() {
s.Require().NotEmpty(calledStderrCommand)
stderrFifoCommand := fmt.Sprintf(stderrFifoCommandFormat, stderrFifoFormat, stderrFifoFormat, stderrFifoFormat)
stderrFifoRegexp := strings.ReplaceAll(regexp.QuoteMeta(stderrFifoCommand), "%d", "\\d*")
s.Regexp(stderrFifoRegexp, calledStderrCommand)
})
s.Run("should return correct output", func() {
s.Equal(s.expectedStdout, stdout.String())
s.Equal(s.expectedStderr, stderr.String())
})
}
func (s *ExecuteCommandTestSuite) TestWithSeparateStderrReturnsCommandError() {
config.Config.Server.InteractiveStderr = true
call := s.mockExecute(mock.AnythingOfType("string"), 0, nil, func(_ mock.Arguments) {})
call.Run(func(args mock.Arguments) {
var ok bool
calledCommand, ok := args.Get(2).(string)
s.Require().True(ok)
if isStderrCommand := strings.Contains(calledCommand, "mkfifo"); isStderrCommand {
// Here we defuse the data race condition of the ReturnArguments being set twice at the same time.
<-time.After(tests.ShortTimeout)
call.ReturnArguments = mock.Arguments{1, nil}
} else {
call.ReturnArguments = mock.Arguments{1, tests.ErrDefault}
}
})
_, err := s.nomadAPIClient.ExecuteCommand(s.allocationID, s.ctx, s.testCommand, withTTY, UnprivilegedExecution,
nullio.Reader{}, io.Discard, io.Discard)
s.Equal(tests.ErrDefault, err)
}
func (s *ExecuteCommandTestSuite) TestWithoutSeparateStderr() {
config.Config.Server.InteractiveStderr = false
var stdout, stderr bytes.Buffer
commandExitCode := 42
// mock regular call
expectedCommand := prepareCommandWithoutTTY(s.testCommand, UnprivilegedExecution)
s.mockExecute(expectedCommand, commandExitCode, nil, func(args mock.Arguments) {
stdout, ok := args.Get(5).(io.Writer)
s.Require().True(ok)
_, err := stdout.Write([]byte(s.expectedStdout))
s.Require().NoError(err)
stderr, ok := args.Get(6).(io.Writer)
s.Require().True(ok)
_, err = stderr.Write([]byte(s.expectedStderr))
s.Require().NoError(err)
})
exitCode, err := s.nomadAPIClient.ExecuteCommand(s.allocationID, s.ctx, s.testCommand, withTTY,
UnprivilegedExecution, nullio.Reader{}, &stdout, &stderr)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "Execute", 1)
s.Equal(commandExitCode, exitCode)
s.Equal(s.expectedStdout, stdout.String())
s.Equal(s.expectedStderr, stderr.String())
}
func (s *ExecuteCommandTestSuite) TestWithoutSeparateStderrReturnsCommandError() {
config.Config.Server.InteractiveStderr = false
expectedCommand := prepareCommandWithoutTTY(s.testCommand, UnprivilegedExecution)
s.mockExecute(expectedCommand, 1, tests.ErrDefault, func(args mock.Arguments) {})
_, err := s.nomadAPIClient.ExecuteCommand(s.allocationID, s.ctx, s.testCommand, withTTY, UnprivilegedExecution,
nullio.Reader{}, io.Discard, io.Discard)
s.ErrorIs(err, tests.ErrDefault)
}
func (s *ExecuteCommandTestSuite) mockExecute(command interface{}, exitCode int,
err error, runFunc func(arguments mock.Arguments)) *mock.Call {
return s.apiMock.On("Execute", s.allocationID, mock.Anything, command, withTTY,
mock.Anything, mock.Anything, mock.Anything).
Run(runFunc).
Return(exitCode, err)
}
func (s *MainTestSuite) TestAPIClient_LoadRunnerPortMappings() {
apiMock := &apiQuerierMock{}
mockedCall := apiMock.On("allocation", tests.DefaultRunnerID)
nomadAPIClient := APIClient{apiQuerier: apiMock}
s.Run("should return error when API query fails", func() {
mockedCall.Return(nil, tests.ErrDefault)
portMappings, err := nomadAPIClient.LoadRunnerPortMappings(tests.DefaultRunnerID)
s.Nil(portMappings)
s.ErrorIs(err, tests.ErrDefault)
})
s.Run("should return error when AllocatedResources is nil", func() {
mockedCall.Return(&nomadApi.Allocation{AllocatedResources: nil}, nil)
portMappings, err := nomadAPIClient.LoadRunnerPortMappings(tests.DefaultRunnerID)
s.ErrorIs(err, ErrorNoAllocatedResourcesFound)
s.Nil(portMappings)
})
s.Run("should correctly return ports", func() {
allocation := &nomadApi.Allocation{
AllocatedResources: &nomadApi.AllocatedResources{
Shared: nomadApi.AllocatedSharedResources{Ports: tests.DefaultPortMappings},
},
}
mockedCall.Return(allocation, nil)
portMappings, err := nomadAPIClient.LoadRunnerPortMappings(tests.DefaultRunnerID)
s.NoError(err)
s.Equal(tests.DefaultPortMappings, portMappings)
})
}