From 9300a82535dae296131e8f875a76103ce9dc21c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Pa=C3=9F?= <22845248+mpass99@users.noreply.github.com> Date: Thu, 11 May 2023 13:21:14 +0100 Subject: [PATCH] Fix missing idle runners. In the context of #358 we identified that the event with the type `AllocationUpdated` and the client status `pending` is common but not always send by Nomad. With this Commit we remove the condition that limits the evaluated Nomad events to the event with the type `AllocationUpdated`. Without the condition the event of the type `PlanResult` and the status `pending` will be evaluated equally. By now, this event seems to be sent every time. This restriction led to started allocation not being registered when the `AllocationUpdated` event with client status `pending` was missing. --- internal/nomad/nomad.go | 9 +++------ internal/nomad/nomad_test.go | 31 +++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 14 deletions(-) diff --git a/internal/nomad/nomad.go b/internal/nomad/nomad.go index a4b861a..de6f568 100644 --- a/internal/nomad/nomad.go +++ b/internal/nomad/nomad.go @@ -318,9 +318,6 @@ func handleEvaluationEvent(evaluations map[string]chan error, event *nomadApi.Ev // storage the state is persisted between multiple calls of this function. func handleAllocationEvent(startTime int64, allocations storage.Storage[*allocationData], event *nomadApi.Event, callbacks *AllocationProcessoring) error { - if event.Type != structs.TypeAllocationUpdated { - return nil - } alloc, err := event.Allocation() if err != nil { return fmt.Errorf("failed to retrieve allocation from event: %w", err) @@ -359,12 +356,13 @@ func handleAllocationEvent(startTime int64, allocations storage.Storage[*allocat // This allows the handling of startups and re-placements of allocations. func handlePendingAllocationEvent(alloc *nomadApi.Allocation, allocations storage.Storage[*allocationData], callbacks *AllocationProcessoring) { - log.WithField("alloc_id", alloc.ID).Debug("Handle Pending Allocation Event") if alloc.DesiredStatus == structs.AllocDesiredStatusRun { allocData, ok := allocations.Get(alloc.ID) if ok && allocData.allocClientStatus != structs.AllocClientStatusRunning { // Pending Allocation is already stored. - log.WithField("alloc_id", alloc.ID).Debug("Pending Allocation already stored") + // This happens because depending on the startup duration of the runner, we get zero, one, or more events + // notifying us that the allocation is still pending. + // We are just interested in the first event, in order to have the correct start time. return } else if ok { // Handle Runner (/Container) re-allocations. @@ -385,7 +383,6 @@ func handlePendingAllocationEvent(alloc *nomadApi.Allocation, // handleRunningAllocationEvent calls the passed AllocationProcessor filtering similar events. func handleRunningAllocationEvent(alloc *nomadApi.Allocation, allocations storage.Storage[*allocationData], callbacks *AllocationProcessoring) { - log.WithField("alloc_id", alloc.ID).Debug("Handle Running Allocation Event") if alloc.DesiredStatus == structs.AllocDesiredStatusRun { // is first event that marks the transition between pending and running? if allocData, ok := allocations.Get(alloc.ID); ok && allocData.allocClientStatus == structs.AllocClientStatusPending { diff --git a/internal/nomad/nomad_test.go b/internal/nomad/nomad_test.go index a7b9c57..90ad725 100644 --- a/internal/nomad/nomad_test.go +++ b/internal/nomad/nomad_test.go @@ -509,16 +509,31 @@ func TestApiClient_WatchAllocationsHandlesEvents(t *testing.T) { } func TestHandleAllocationEventBuffersPendingAllocation(t *testing.T) { - newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun) - newPendingEvent := eventForAllocation(t, newPendingAllocation) + t.Run("AllocationUpdated", func(t *testing.T) { + newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun) + newPendingEvent := eventForAllocation(t, newPendingAllocation) - allocations := storage.NewLocalStorage[*allocationData]() - err := handleAllocationEvent( - time.Now().UnixNano(), allocations, &newPendingEvent, noopAllocationProcessoring) - require.NoError(t, err) + allocations := storage.NewLocalStorage[*allocationData]() + err := handleAllocationEvent( + time.Now().UnixNano(), allocations, &newPendingEvent, noopAllocationProcessoring) + require.NoError(t, err) - _, ok := allocations.Get(newPendingAllocation.ID) - assert.True(t, ok) + _, ok := allocations.Get(newPendingAllocation.ID) + assert.True(t, ok) + }) + t.Run("PlanResult", func(t *testing.T) { + newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun) + newPendingEvent := eventForAllocation(t, newPendingAllocation) + newPendingEvent.Type = structs.TypePlanResult + + allocations := storage.NewLocalStorage[*allocationData]() + err := handleAllocationEvent( + time.Now().UnixNano(), allocations, &newPendingEvent, noopAllocationProcessoring) + require.NoError(t, err) + + _, ok := allocations.Get(newPendingAllocation.ID) + assert.True(t, ok) + }) } func TestAPIClient_WatchAllocationsReturnsErrorWhenAllocationStreamCannotBeRetrieved(t *testing.T) {