Add a warning when allocations fail (#83)

* Log a warning when an allocation fails

* Restructure allocation event handling
This commit is contained in:
Maximilian Paß
2021-12-23 13:10:55 +01:00
committed by GitHub
parent 79578bd483
commit 1239699e74

View File

@ -266,25 +266,61 @@ func handleAllocationEvent(startTime int64, pendingAllocations map[string]bool,
return nil
}
if alloc.ClientStatus == structs.AllocClientStatusRunning {
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop:
onDeletedAllocation(alloc)
case structs.AllocDesiredStatusRun:
// is first event that marks the transition between pending and running?
_, ok := pendingAllocations[alloc.ID]
if ok {
onNewAllocation(alloc)
delete(pendingAllocations, alloc.ID)
}
}
switch alloc.ClientStatus {
case structs.AllocClientStatusPending:
handlePendingAllocationEvent(alloc, pendingAllocations)
case structs.AllocClientStatusRunning:
handleRunningAllocationEvent(alloc, pendingAllocations, onNewAllocation, onDeletedAllocation)
case structs.AllocClientStatusFailed:
handleFailedAllocationEvent(alloc)
}
return nil
}
if alloc.ClientStatus == structs.AllocClientStatusPending && alloc.DesiredStatus == structs.AllocDesiredStatusRun {
// handlePendingAllocationEvent sets flag in pendingAllocations that can be used to filter following events.
func handlePendingAllocationEvent(alloc *nomadApi.Allocation, pendingAllocations map[string]bool) {
if alloc.DesiredStatus == structs.AllocDesiredStatusRun {
// allocation is started, wait until it runs and add to our list afterwards
pendingAllocations[alloc.ID] = true
}
return nil
}
// handleRunningAllocationEvent calls the passed AllocationProcessor filtering similar events.
func handleRunningAllocationEvent(alloc *nomadApi.Allocation,
pendingAllocations map[string]bool, onNewAllocation, onDeletedAllocation AllocationProcessor) {
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop:
onDeletedAllocation(alloc)
case structs.AllocDesiredStatusRun:
// is first event that marks the transition between pending and running?
_, ok := pendingAllocations[alloc.ID]
if ok {
onNewAllocation(alloc)
delete(pendingAllocations, alloc.ID)
}
}
}
// handleFailedAllocationEvent logs only the first of the multiple failure events.
func handleFailedAllocationEvent(alloc *nomadApi.Allocation) {
if alloc.FollowupEvalID == "" && alloc.PreviousAllocation == "" {
log.WithField("job", alloc.JobID).
WithField("reason", failureDisplayMessage(alloc)).
WithField("alloc", alloc).
Warn("Allocation failure")
}
}
// failureDisplayMessage parses the DisplayMessage of a failed allocation.
func failureDisplayMessage(alloc *nomadApi.Allocation) (msg string) {
for _, state := range alloc.TaskStates {
for _, event := range state.Events {
if event.FailsTask {
return event.DisplayMessage
}
}
}
return ""
}
// checkEvaluation checks whether the given evaluation failed.