Add Nomad job registration with monitoring afterwards

Once a Nomad job is registered, we listen to the Nomad event stream
and return once we find the evaluation to complete.
This commit is contained in:
sirkrypt0
2021-05-26 12:46:54 +02:00
committed by Tobias Kantusch
parent 4c3cc0cc4c
commit f228a3e599
7 changed files with 567 additions and 35 deletions

View File

@ -1,6 +1,7 @@
package environment
import (
"context"
_ "embed"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
@ -14,9 +15,46 @@ const (
TaskNameFormat = "%s-task"
)
// defaultJobHCL holds our default job in HCL format.
// The default job is used when creating new job and provides
// common settings that all the jobs share.
//go:embed default-job.hcl
var defaultJobHCL string
// registerJob creates a Nomad job based on the default job configuration and the given parameters.
// It registers the job with Nomad and waits until the registration completes.
func (m *NomadEnvironmentManager) registerJob(
id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) error {
job := createJob(m.defaultJob, id, prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
evalID, err := m.api.RegisterNomadJob(job)
if err != nil {
return err
}
return m.api.MonitorEvaluation(evalID, context.Background())
}
func createJob(
defaultJob nomadApi.Job,
id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) *nomadApi.Job {
job := defaultJob
job.ID = &id
job.Name = &id
var taskGroup = createTaskGroup(&job, fmt.Sprintf(nomad.TaskGroupNameFormat, id), prewarmingPoolSize)
configureTask(taskGroup, fmt.Sprintf(TaskNameFormat, id), cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
return &job
}
func parseJob(jobHCL string) *nomadApi.Job {
config := jobspec2.ParseConfig{
Body: []byte(jobHCL),
@ -25,7 +63,7 @@ func parseJob(jobHCL string) *nomadApi.Job {
}
job, err := jobspec2.ParseWithConfig(&config)
if err != nil {
log.WithError(err).Fatal("Error parsing default Nomad job")
log.WithError(err).Fatal("Error parsing Nomad job")
return nil
}
@ -48,7 +86,7 @@ func createTaskGroup(job *nomadApi.Job, name string, prewarmingPoolSize uint) *n
func configureNetwork(taskGroup *nomadApi.TaskGroup, networkAccess bool, exposedPorts []uint16) {
if len(taskGroup.Tasks) == 0 {
// This function is only used internally and must be called after configuring the task.
// This function is only used internally and must be called as last step when configuring the task.
// This error is not recoverable.
log.Fatal("Can't configure network before task has been configured!")
}
@ -62,8 +100,8 @@ func configureNetwork(taskGroup *nomadApi.TaskGroup, networkAccess bool, exposed
} else {
networkResource = taskGroup.Networks[0]
}
// prefer "bridge" network over "host" to have an isolated network namespace with bridged interface
// instead of joining the host network namespace
// Prefer "bridge" network over "host" to have an isolated network namespace with bridged interface
// instead of joining the host network namespace.
networkResource.Mode = "bridge"
for _, portNumber := range exposedPorts {
port := nomadApi.Port{
@ -73,8 +111,8 @@ func configureNetwork(taskGroup *nomadApi.TaskGroup, networkAccess bool, exposed
networkResource.DynamicPorts = append(networkResource.DynamicPorts, port)
}
} else {
// somehow, we can't set the network mode to none in the NetworkResource on task group level
// see https://github.com/hashicorp/nomad/issues/10540
// Somehow, we can't set the network mode to none in the NetworkResource on task group level.
// See https://github.com/hashicorp/nomad/issues/10540
if task.Config == nil {
task.Config = make(map[string]interface{})
}
@ -97,11 +135,11 @@ func configureTask(
task = taskGroup.Tasks[0]
task.Name = name
}
iCpuLimit := int(cpuLimit)
iMemoryLimit := int(memoryLimit)
integerCpuLimit := int(cpuLimit)
integerMemoryLimit := int(memoryLimit)
task.Resources = &nomadApi.Resources{
CPU: &iCpuLimit,
MemoryMB: &iMemoryLimit,
CPU: &integerCpuLimit,
MemoryMB: &integerMemoryLimit,
}
if task.Config == nil {
@ -111,20 +149,3 @@ func configureTask(
configureNetwork(taskGroup, networkAccess, exposedPorts)
}
func (m *NomadEnvironmentManager) createJob(
id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) *nomadApi.Job {
job := m.defaultJob
job.ID = &id
job.Name = &id
var taskGroup = createTaskGroup(&job, fmt.Sprintf(nomad.TaskGroupNameFormat, id), prewarmingPoolSize)
configureTask(taskGroup, fmt.Sprintf(TaskNameFormat, id), cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
return &job
}

View File

@ -1,11 +1,13 @@
package environment
import (
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"gitlab.hpi.de/codeocean/codemoon/poseidon/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/runner"
@ -240,8 +242,9 @@ func TestConfigureTaskWhenTaskExists(t *testing.T) {
func TestCreateJobSetsAllGivenArguments(t *testing.T) {
testJob, base := createTestJob()
apiClient := NomadEnvironmentManager{&runner.NomadRunnerManager{}, &nomad.ApiClient{}, *base}
job := apiClient.createJob(
manager := NomadEnvironmentManager{&runner.NomadRunnerManager{}, &nomad.ApiClient{}, *base}
job := createJob(
manager.defaultJob,
*testJob.ID,
uint(*testJob.TaskGroups[0].Count),
uint(*testJob.TaskGroups[0].Tasks[0].Resources.CPU),
@ -252,3 +255,55 @@ func TestCreateJobSetsAllGivenArguments(t *testing.T) {
)
assert.Equal(t, *testJob, *job)
}
func TestRegisterJobWhenNomadJobRegistrationFails(t *testing.T) {
apiMock := nomad.ExecutorApiMock{}
expectedErr := errors.New("test error")
apiMock.On("RegisterNomadJob", mock.AnythingOfType("*api.Job")).Return("", expectedErr)
m := NomadEnvironmentManager{
runnerManager: nil,
api: &apiMock,
defaultJob: nomadApi.Job{},
}
err := m.registerJob("id", 1, 2, 3, "image", false, []uint16{})
assert.Equal(t, expectedErr, err)
apiMock.AssertNotCalled(t, "EvaluationStream")
}
func TestRegisterJobSucceedsWhenMonitoringEvaluationSucceeds(t *testing.T) {
apiMock := nomad.ExecutorApiMock{}
evaluationID := "id"
apiMock.On("RegisterNomadJob", mock.AnythingOfType("*api.Job")).Return(evaluationID, nil)
apiMock.On("MonitorEvaluation", evaluationID, mock.AnythingOfType("*context.emptyCtx")).Return(nil)
m := NomadEnvironmentManager{
runnerManager: nil,
api: &apiMock,
defaultJob: nomadApi.Job{},
}
err := m.registerJob("id", 1, 2, 3, "image", false, []uint16{})
assert.NoError(t, err)
}
func TestRegisterJobReturnsErrorWhenMonitoringEvaluationFails(t *testing.T) {
apiMock := nomad.ExecutorApiMock{}
evaluationID := "id"
expectedErr := errors.New("test error")
apiMock.On("RegisterNomadJob", mock.AnythingOfType("*api.Job")).Return(evaluationID, nil)
apiMock.On("MonitorEvaluation", evaluationID, mock.AnythingOfType("*context.emptyCtx")).Return(expectedErr)
m := NomadEnvironmentManager{
runnerManager: nil,
api: &apiMock,
defaultJob: nomadApi.Job{},
}
err := m.registerJob("id", 1, 2, 3, "image", false, []uint16{})
assert.Equal(t, expectedErr, err)
}

View File

@ -1,6 +1,7 @@
package nomad
import (
"context"
nomadApi "github.com/hashicorp/nomad/api"
"net/url"
)
@ -24,11 +25,20 @@ type apiQuerier interface {
// loadRunners loads all allocations of the specified job.
loadRunners(jobId string) (allocationListStub []*nomadApi.AllocationListStub, err error)
// RegisterNomadJob registers a job with Nomad.
// It returns the evaluation ID that can be used when listening to the Nomad event stream.
RegisterNomadJob(job *nomadApi.Job) (string, error)
// EvaluationStream returns a Nomad event stream filtered to return only events belonging to the
// given evaluation ID.
EvaluationStream(evalID string, ctx context.Context) (<-chan *nomadApi.Events, error)
}
// nomadApiClient implements the nomadApiQuerier interface and provides access to a real Nomad API.
type nomadApiClient struct {
client *nomadApi.Client
client *nomadApi.Client
namespace string
}
func (nc *nomadApiClient) init(nomadURL *url.URL, nomadNamespace string) (err error) {
@ -37,6 +47,7 @@ func (nc *nomadApiClient) init(nomadURL *url.URL, nomadNamespace string) (err er
TLSConfig: &nomadApi.TLSConfig{},
Namespace: nomadNamespace,
})
nc.namespace = nomadNamespace
return err
}
@ -53,3 +64,29 @@ func (nc *nomadApiClient) loadRunners(jobId string) (allocationListStub []*nomad
allocationListStub, _, err = nc.client.Jobs().Allocations(jobId, true, nil)
return
}
func (nc *nomadApiClient) RegisterNomadJob(job *nomadApi.Job) (string, error) {
job.Namespace = &nc.namespace
resp, _, err := nc.client.Jobs().Register(job, nil)
if err != nil {
return "", err
}
if resp.Warnings != "" {
log.
WithField("job", job).
WithField("warnings", resp.Warnings).
Warn("Received warnings when registering job")
}
return resp.EvalID, nil
}
func (nc *nomadApiClient) EvaluationStream(evalID string, ctx context.Context) (stream <-chan *nomadApi.Events, err error) {
stream, err = nc.client.EventStream().Stream(
ctx,
map[nomadApi.Topic][]string{
nomadApi.TopicEvaluation: {evalID},
},
0,
nil)
return
}

View File

@ -3,7 +3,10 @@
package nomad
import (
context "context"
api "github.com/hashicorp/nomad/api"
mock "github.com/stretchr/testify/mock"
url "net/url"
@ -28,6 +31,29 @@ func (_m *apiQuerierMock) DeleteRunner(runnerId string) error {
return r0
}
// EvaluationStream provides a mock function with given fields: evalID, ctx
func (_m *apiQuerierMock) EvaluationStream(evalID string, ctx context.Context) (<-chan *api.Events, error) {
ret := _m.Called(evalID, ctx)
var r0 <-chan *api.Events
if rf, ok := ret.Get(0).(func(string, context.Context) <-chan *api.Events); ok {
r0 = rf(evalID, ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan *api.Events)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context) error); ok {
r1 = rf(evalID, ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// JobScale provides a mock function with given fields: jobId
func (_m *apiQuerierMock) JobScale(jobId string) (int, error) {
ret := _m.Called(jobId)
@ -72,6 +98,27 @@ func (_m *apiQuerierMock) LoadJobList() ([]*api.JobListStub, error) {
return r0, r1
}
// RegisterNomadJob provides a mock function with given fields: job
func (_m *apiQuerierMock) RegisterNomadJob(job *api.Job) (string, error) {
ret := _m.Called(job)
var r0 string
if rf, ok := ret.Get(0).(func(*api.Job) string); ok {
r0 = rf(job)
} else {
r0 = ret.Get(0).(string)
}
var r1 error
if rf, ok := ret.Get(1).(func(*api.Job) error); ok {
r1 = rf(job)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// SetJobScale provides a mock function with given fields: jobId, count, reason
func (_m *apiQuerierMock) SetJobScale(jobId string, count int, reason string) error {
ret := _m.Called(jobId, count, reason)

View File

@ -1,9 +1,12 @@
// Code generated by mockery v2.7.5. DO NOT EDIT.
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package nomad
import (
context "context"
api "github.com/hashicorp/nomad/api"
mock "github.com/stretchr/testify/mock"
url "net/url"
@ -28,6 +31,29 @@ func (_m *ExecutorApiMock) DeleteRunner(runnerId string) error {
return r0
}
// EvaluationStream provides a mock function with given fields: evalID, ctx
func (_m *ExecutorApiMock) EvaluationStream(evalID string, ctx context.Context) (<-chan *api.Events, error) {
ret := _m.Called(evalID, ctx)
var r0 <-chan *api.Events
if rf, ok := ret.Get(0).(func(string, context.Context) <-chan *api.Events); ok {
r0 = rf(evalID, ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan *api.Events)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context) error); ok {
r1 = rf(evalID, ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// JobScale provides a mock function with given fields: jobId
func (_m *ExecutorApiMock) JobScale(jobId string) (int, error) {
ret := _m.Called(jobId)
@ -95,6 +121,41 @@ func (_m *ExecutorApiMock) LoadRunners(jobId string) ([]string, error) {
return r0, r1
}
// MonitorEvaluation provides a mock function with given fields: evalID, ctx
func (_m *ExecutorApiMock) MonitorEvaluation(evalID string, ctx context.Context) error {
ret := _m.Called(evalID, ctx)
var r0 error
if rf, ok := ret.Get(0).(func(string, context.Context) error); ok {
r0 = rf(evalID, ctx)
} else {
r0 = ret.Error(0)
}
return r0
}
// RegisterNomadJob provides a mock function with given fields: job
func (_m *ExecutorApiMock) RegisterNomadJob(job *api.Job) (string, error) {
ret := _m.Called(job)
var r0 string
if rf, ok := ret.Get(0).(func(*api.Job) string); ok {
r0 = rf(job)
} else {
r0 = ret.Get(0).(string)
}
var r1 error
if rf, ok := ret.Get(1).(func(*api.Job) error); ok {
r1 = rf(job)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// SetJobScale provides a mock function with given fields: jobId, count, reason
func (_m *ExecutorApiMock) SetJobScale(jobId string, count int, reason string) error {
ret := _m.Called(jobId, count, reason)

View File

@ -1,16 +1,30 @@
package nomad
import (
"context"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"gitlab.hpi.de/codeocean/codemoon/poseidon/logging"
"net/url"
"strings"
)
var log = logging.GetLogger("nomad")
// ExecutorApi provides access to an container orchestration solution
type ExecutorApi interface {
apiQuerier
// LoadRunners loads all allocations of the specified job which are running and not about to get stopped.
LoadRunners(jobId string) (runnerIds []string, err error)
// MonitorEvaluation monitors the given evaluation ID.
// It waits until the evaluation reaches one of the states complete, cancelled or failed.
// If the evaluation was not successful, an error containing the failures is returned.
// See also https://github.com/hashicorp/nomad/blob/7d5a9ecde95c18da94c9b6ace2565afbfdd6a40d/command/monitor.go#L175
MonitorEvaluation(evalID string, ctx context.Context) error
}
// ApiClient implements the ExecutorApi interface and can be used to perform different operations on the real Executor API and its return values.
@ -27,8 +41,8 @@ func NewExecutorApi(nomadURL *url.URL, nomadNamespace string) (ExecutorApi, erro
}
// init prepares an apiClient to be able to communicate to a provided Nomad API.
func (apiClient *ApiClient) init(nomadURL *url.URL, nomadNamespace string) (err error) {
err = apiClient.apiQuerier.init(nomadURL, nomadNamespace)
func (a *ApiClient) init(nomadURL *url.URL, nomadNamespace string) (err error) {
err = a.apiQuerier.init(nomadURL, nomadNamespace)
if err != nil {
return err
}
@ -36,9 +50,9 @@ func (apiClient *ApiClient) init(nomadURL *url.URL, nomadNamespace string) (err
}
// LoadRunners loads the allocations of the specified job.
func (apiClient *ApiClient) LoadRunners(jobId string) (runnerIds []string, err error) {
func (a *ApiClient) LoadRunners(jobId string) (runnerIds []string, err error) {
//list, _, err := apiClient.client.Jobs().Allocations(jobId, true, nil)
list, err := apiClient.loadRunners(jobId)
list, err := a.loadRunners(jobId)
if err != nil {
return nil, err
}
@ -50,3 +64,59 @@ func (apiClient *ApiClient) LoadRunners(jobId string) (runnerIds []string, err e
}
return
}
func (a *ApiClient) MonitorEvaluation(evalID string, ctx context.Context) error {
var events *nomadApi.Events
stream, err := a.EvaluationStream(evalID, ctx)
if err != nil {
return err
}
for {
select {
case events = <-stream:
case <-ctx.Done():
return nil
}
if events.IsHeartbeat() {
continue
}
if err := events.Err; err != nil {
log.WithError(err).Warn("Error monitoring evaluation")
return err
}
for _, event := range events.Events {
eval, err := event.Evaluation()
if err != nil {
log.WithError(err).Warn("Error retrieving evaluation from streamed event")
return err
}
switch eval.Status {
case structs.EvalStatusComplete, structs.EvalStatusCancelled, structs.EvalStatusFailed:
return checkEvaluation(eval)
default:
}
}
}
}
// checkEvaluation checks whether the given evaluation failed.
// If the evaluation failed, it returns an error with a message containing the failure information.
func checkEvaluation(eval *nomadApi.Evaluation) error {
if len(eval.FailedTGAllocs) == 0 {
if eval.Status == structs.EvalStatusComplete {
return nil
}
return fmt.Errorf("evaluation could not complete: %q", eval.Status)
} else {
messages := []string{
fmt.Sprintf("Evaluation %q finished with status %q but failed to place all allocations.", eval.ID, eval.Status),
}
for tg, metrics := range eval.FailedTGAllocs {
messages = append(messages, fmt.Sprintf("%s: %#v", tg, metrics))
}
if eval.BlockedEval != "" {
messages = append(messages, fmt.Sprintf("Evaluation %q waiting for additional capacity to place remainder", eval.BlockedEval))
}
return errors.New(strings.Join(messages, "\n"))
}
}

View File

@ -1,14 +1,19 @@
package nomad
import (
"context"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"net/url"
"testing"
"time"
)
func TestLoadRunnersTestSuite(t *testing.T) {
@ -148,3 +153,239 @@ func TestNewExecutorApiCanBeCreatedWithoutError(t *testing.T) {
_, err = NewExecutorApi(&TestURL, TestNamespace)
require.Nil(t, err)
}
func TestApiClient_MonitorEvaluationReturnsNilWhenContextCancelled(t *testing.T) {
stream := make(<-chan *nomadApi.Events)
ctx, cancel := context.WithCancel(context.Background())
apiMock := &apiQuerierMock{}
apiMock.On("EvaluationStream", mock.AnythingOfType("string"), ctx).Return(stream, nil)
apiClient := &ApiClient{apiMock}
var err error
errChan := make(chan error)
go func() {
errChan <- apiClient.MonitorEvaluation("id", ctx)
}()
cancel()
// If cancel doesn't terminate MonitorEvaluation, this test won't complete without a timeout.
select {
case err = <-errChan:
case <-time.After(time.Millisecond * 10):
t.Fatal("MonitorEvaluation didn't finish as expected")
}
assert.Nil(t, err)
}
func TestApiClient_MonitorEvaluationReturnsErrorWhenStreamReturnsError(t *testing.T) {
apiMock := &apiQuerierMock{}
expectedErr := errors.New("test error")
apiMock.On("EvaluationStream", mock.AnythingOfType("string"), mock.AnythingOfType("*context.emptyCtx")).
Return(nil, expectedErr)
apiClient := &ApiClient{apiMock}
err := apiClient.MonitorEvaluation("id", context.Background())
assert.Equal(t, expectedErr, err)
}
type eventPayload struct {
Evaluation *nomadApi.Evaluation
}
// eventForEvaluation takes an evaluation and creates an Event with the given evaluation
// as its payload. Nomad uses the mapstructure library to decode the payload, which we
// simply reverse here.
func eventForEvaluation(t *testing.T, eval nomadApi.Evaluation) nomadApi.Event {
payload := make(map[string]interface{})
err := mapstructure.Decode(eventPayload{&eval}, &payload)
if err != nil {
t.Fatalf("Couldn't encode evaluation %v", eval)
return nomadApi.Event{}
}
event := nomadApi.Event{Topic: nomadApi.TopicEvaluation, Payload: payload}
return event
}
func runEvaluationMonitoring(t *testing.T, events []*nomadApi.Events) (eventsProcessed int, err error) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
stream := make(chan *nomadApi.Events)
apiMock := &apiQuerierMock{}
// Yes it is hacky. However, we can only get a read-only channel once we return it from a function.
readOnlyStream := func() <-chan *nomadApi.Events { return stream }()
apiMock.On("EvaluationStream", mock.AnythingOfType("string"), ctx).Return(readOnlyStream, nil)
apiClient := &ApiClient{apiMock}
errChan := make(chan error)
go func() {
errChan <- apiClient.MonitorEvaluation("id", ctx)
}()
var e *nomadApi.Events
for eventsProcessed, e = range events {
select {
case err = <-errChan:
return
case stream <- e:
}
}
// wait for error after streaming final event
select {
case err = <-errChan:
case <-time.After(time.Millisecond * 10):
t.Fatal("MonitorEvaluation didn't finish as expected")
}
// Increment once as range starts at 0
eventsProcessed++
return
}
func TestApiClient_MonitorEvaluationWithSuccessfulEvent(t *testing.T) {
eval := nomadApi.Evaluation{Status: structs.EvalStatusComplete}
pendingEval := nomadApi.Evaluation{Status: structs.EvalStatusPending}
// make sure that the tested function can complete
require.Nil(t, checkEvaluation(&eval))
events := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, eval)}}
pendingEvaluationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, pendingEval)}}
multipleEventsWithPending := nomadApi.Events{Events: []nomadApi.Event{
eventForEvaluation(t, pendingEval), eventForEvaluation(t, eval),
}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedEventsProcessed int
name string
}{
{[]*nomadApi.Events{&events}, 1,
"it completes with successful event"},
{[]*nomadApi.Events{&events, &events}, 1,
"it completes at first successful event"},
{[]*nomadApi.Events{{}, &events}, 2,
"it skips heartbeat and completes"},
{[]*nomadApi.Events{&pendingEvaluationEvents, &events}, 2,
"it skips pending evaluation and completes"},
{[]*nomadApi.Events{&multipleEventsWithPending}, 1,
"it handles multiple events per received event"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
eventsProcessed, err := runEvaluationMonitoring(t, c.streamedEvents)
assert.Nil(t, err)
assert.Equal(t, c.expectedEventsProcessed, eventsProcessed)
})
}
}
func TestApiClient_MonitorEvaluationWithFailingEvent(t *testing.T) {
eval := nomadApi.Evaluation{Status: structs.EvalStatusFailed}
evalErr := checkEvaluation(&eval)
require.NotNil(t, evalErr)
pendingEval := nomadApi.Evaluation{Status: structs.EvalStatusPending}
eventsErr := errors.New("my events error")
events := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, eval)}}
pendingEvaluationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, pendingEval)}}
multipleEventsWithPending := nomadApi.Events{Events: []nomadApi.Event{
eventForEvaluation(t, pendingEval), eventForEvaluation(t, eval),
}}
eventsWithErr := nomadApi.Events{Err: eventsErr, Events: []nomadApi.Event{{}}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedEventsProcessed int
expectedError error
name string
}{
{[]*nomadApi.Events{&events}, 1, evalErr,
"it fails with failing event"},
{[]*nomadApi.Events{&events, &events}, 1, evalErr,
"it fails at first failing event"},
{[]*nomadApi.Events{{}, &events}, 2, evalErr,
"it skips heartbeat and fail"},
{[]*nomadApi.Events{&pendingEvaluationEvents, &events}, 2, evalErr,
"it skips pending evaluation and fail"},
{[]*nomadApi.Events{&multipleEventsWithPending}, 1, evalErr,
"it handles multiple events per received event and fails"},
{[]*nomadApi.Events{&eventsWithErr}, 1, eventsErr,
"it fails with event error when event has error"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
eventsProcessed, err := runEvaluationMonitoring(t, c.streamedEvents)
assert.Equal(t, c.expectedError, err)
assert.Equal(t, c.expectedEventsProcessed, eventsProcessed)
})
}
}
func TestApiClient_MonitorEvaluationFailsWhenFailingToDecodeEvaluation(t *testing.T) {
event := nomadApi.Event{
Topic: nomadApi.TopicEvaluation,
// This should fail decoding, as Evaluation.Status is expected to be a string, not int
Payload: map[string]interface{}{"Evaluation": map[string]interface{}{"Status": 1}},
}
_, err := event.Evaluation()
require.NotNil(t, err)
eventsProcessed, err := runEvaluationMonitoring(t, []*nomadApi.Events{{Events: []nomadApi.Event{event}}})
assert.Equal(t, err, err)
assert.Equal(t, 1, eventsProcessed)
}
func TestCheckEvaluationWithFailedAllocations(t *testing.T) {
testKey := "test1"
failedAllocs := map[string]*nomadApi.AllocationMetric{
testKey: {NodesExhausted: 1},
}
evaluation := nomadApi.Evaluation{FailedTGAllocs: failedAllocs, Status: structs.EvalStatusFailed}
assertMessageContainsCorrectStrings := func(msg string) {
assert.Contains(t, msg, evaluation.Status, "error should contain the evaluation status")
assert.Contains(t, msg, fmt.Sprintf("%s: %#v", testKey, failedAllocs[testKey]),
"error should contain the failed allocations metric")
}
var msgWithoutBlockedEval, msgWithBlockedEval string
t.Run("without blocked eval", func(t *testing.T) {
err := checkEvaluation(&evaluation)
require.NotNil(t, err)
msgWithoutBlockedEval = err.Error()
assertMessageContainsCorrectStrings(msgWithoutBlockedEval)
})
t.Run("with blocked eval", func(t *testing.T) {
evaluation.BlockedEval = "blocking-eval"
err := checkEvaluation(&evaluation)
require.NotNil(t, err)
msgWithBlockedEval = err.Error()
assertMessageContainsCorrectStrings(msgWithBlockedEval)
})
assert.NotEqual(t, msgWithBlockedEval, msgWithoutBlockedEval)
}
func TestCheckEvaluationWithoutFailedAllocations(t *testing.T) {
evaluation := nomadApi.Evaluation{FailedTGAllocs: make(map[string]*nomadApi.AllocationMetric)}
t.Run("when evaluation status complete", func(t *testing.T) {
evaluation.Status = structs.EvalStatusComplete
err := checkEvaluation(&evaluation)
assert.Nil(t, err)
})
t.Run("when evaluation status not complete", func(t *testing.T) {
for _, status := range []string{structs.EvalStatusFailed, structs.EvalStatusCancelled, structs.EvalStatusBlocked, structs.EvalStatusPending} {
evaluation.Status = status
err := checkEvaluation(&evaluation)
require.NotNil(t, err)
assert.Contains(t, err.Error(), status, "error should contain the evaluation status")
}
})
}