Add updating cached allocations
This commit is contained in:

committed by
Maximilian Pass

parent
66821dbfc8
commit
3f572261c2
@@ -9,6 +9,7 @@ import (
|
||||
"gitlab.hpi.de/codeocean/codemoon/poseidon/logging"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -16,6 +17,8 @@ var (
|
||||
ErrorExecutorCommunicationFailed = errors.New("communication with executor failed")
|
||||
)
|
||||
|
||||
type allocationProcessor func(*nomadApi.Allocation)
|
||||
|
||||
// ExecutorApi provides access to an container orchestration solution
|
||||
type ExecutorApi interface {
|
||||
apiQuerier
|
||||
@@ -28,6 +31,10 @@ type ExecutorApi interface {
|
||||
// If the evaluation was not successful, an error containing the failures is returned.
|
||||
// See also https://github.com/hashicorp/nomad/blob/7d5a9ecde95c18da94c9b6ace2565afbfdd6a40d/command/monitor.go#L175
|
||||
MonitorEvaluation(evalID string, ctx context.Context) error
|
||||
|
||||
// WatchAllocations listens on the Nomad event stream for allocation events.
|
||||
// Depending on the incoming event, any of the given function is executed.
|
||||
WatchAllocations(ctx context.Context, onNewAllocation, onDeletedAllocation allocationProcessor) error
|
||||
}
|
||||
|
||||
// ApiClient implements the ExecutorApi interface and can be used to perform different operations on the real Executor API and its return values.
|
||||
@@ -74,26 +81,100 @@ func (a *ApiClient) MonitorEvaluation(evalID string, ctx context.Context) error
|
||||
return err
|
||||
}
|
||||
// If ctx is cancelled, the stream will be closed by Nomad and we exit the for loop.
|
||||
return receiveAndHandleNomadAPIEvents(stream, handleEvaluationEvent)
|
||||
}
|
||||
|
||||
func (a *ApiClient) WatchAllocations(ctx context.Context, onNewAllocation, onDeletedAllocation allocationProcessor) error {
|
||||
startTime := time.Now().UnixNano()
|
||||
stream, err := a.AllocationStream(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed retrieving allocation stream: %w", err)
|
||||
}
|
||||
waitingToRun := make(map[string]bool)
|
||||
|
||||
handler := func(event nomadApi.Event) error {
|
||||
return handleAllocationEvent(startTime, waitingToRun, event, onNewAllocation, onDeletedAllocation)
|
||||
}
|
||||
|
||||
err = receiveAndHandleNomadAPIEvents(stream, handler)
|
||||
return err
|
||||
}
|
||||
|
||||
type nomadAPIEventHandler func(event nomadApi.Event) error
|
||||
|
||||
// receiveAndHandleNomadAPIEvents receives events from the Nomad event stream and calls the handler function for each received
|
||||
// event. It skips heartbeat events and returns an error if the received events contain an error.
|
||||
func receiveAndHandleNomadAPIEvents(stream <-chan *nomadApi.Events, handler nomadAPIEventHandler) error {
|
||||
// If original context is cancelled, the stream will be closed by Nomad and we exit the for loop.
|
||||
for events := range stream {
|
||||
if events.IsHeartbeat() {
|
||||
continue
|
||||
}
|
||||
if err := events.Err; err != nil {
|
||||
log.WithError(err).Warn("Error monitoring evaluation")
|
||||
return err
|
||||
return fmt.Errorf("error receiving events: %w", err)
|
||||
}
|
||||
for _, event := range events.Events {
|
||||
eval, err := event.Evaluation()
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("Error retrieving evaluation from streamed event")
|
||||
// TODO: we can't break out of this function from inside the handler
|
||||
if err := handler(event); err != nil {
|
||||
return err
|
||||
}
|
||||
switch eval.Status {
|
||||
case structs.EvalStatusComplete, structs.EvalStatusCancelled, structs.EvalStatusFailed:
|
||||
return checkEvaluation(eval)
|
||||
default:
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleEvaluationEvent is a nomadAPIEventHandler that returns the status of an evaluation in the event.
|
||||
func handleEvaluationEvent(event nomadApi.Event) error {
|
||||
eval, err := event.Evaluation()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed monitoring evaluation: %w", err)
|
||||
}
|
||||
switch eval.Status {
|
||||
case structs.EvalStatusComplete, structs.EvalStatusCancelled, structs.EvalStatusFailed:
|
||||
return checkEvaluation(eval)
|
||||
default:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleAllocationEvent is a nomadAPIEventHandler that processes allocation events.
|
||||
// If a new allocation is received, onNewAllocation is called. If an allocation is deleted, onDeletedAllocation
|
||||
// is called. The waitingToRun map is used to store allocations that are pending but not started yet. Using the map
|
||||
// the state is persisted between multiple calls of this function.
|
||||
func handleAllocationEvent(startTime int64, waitingToRun map[string]bool, event nomadApi.Event,
|
||||
onNewAllocation, onDeletedAllocation allocationProcessor) error {
|
||||
alloc, err := event.Allocation()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed retrieving allocation from event %v: %w", event, err)
|
||||
}
|
||||
if alloc == nil || event.Type == structs.TypePlanResult {
|
||||
return nil
|
||||
}
|
||||
|
||||
if event.Type == structs.TypeAllocationUpdated {
|
||||
// When starting the API and listening on the Nomad event stream we might get events that already
|
||||
// happened from Nomad as it seems to buffer them for a certain duration.
|
||||
// Ignore old events here.
|
||||
if alloc.ModifyTime < startTime {
|
||||
return nil
|
||||
}
|
||||
|
||||
if alloc.ClientStatus == structs.AllocClientStatusRunning {
|
||||
switch alloc.DesiredStatus {
|
||||
case structs.AllocDesiredStatusStop:
|
||||
onDeletedAllocation(alloc)
|
||||
case structs.AllocDesiredStatusRun:
|
||||
// first event that marks the transition between pending and running
|
||||
_, ok := pendingAllocations[alloc.ID]
|
||||
if ok {
|
||||
onNewAllocation(alloc)
|
||||
delete(pendingAllocations, alloc.ID)
|
||||
}
|
||||
}
|
||||
if alloc.ClientStatus == structs.AllocClientStatusPending && alloc.DesiredStatus == structs.AllocDesiredStatusRun {
|
||||
// allocation is started, wait until it runs and add to our list afterwards
|
||||
waitingToRun[alloc.ID] = true
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user