Refactor Nomad Recovery

from an approach that loaded the runners only once at the startup
to a method that will be repeated i.e. if the Nomad Event Stream connection interrupts.
This commit is contained in:
Maximilian Paß
2023-10-23 14:36:14 +02:00
committed by Sebastian Serth
parent b2898f9183
commit 6b69a2d732
22 changed files with 211 additions and 120 deletions

View File

@@ -27,8 +27,9 @@ var (
ErrorPlacingAllocations = errors.New("failed to place all allocations")
ErrorLoadingJob = errors.New("failed to load job")
ErrorNoAllocatedResourcesFound = errors.New("no allocated resources found")
ErrorOOMKilled RunnerDeletedReason = errors.New("the allocation was OOM Killed")
ErrorAllocationRescheduled RunnerDeletedReason = errors.New("the allocation was rescheduled")
ErrorLocalDestruction RunnerDeletedReason = errors.New("the destruction should not cause external changes")
ErrorOOMKilled RunnerDeletedReason = fmt.Errorf("%s: %w", dto.ErrOOMKilled.Error(), ErrorLocalDestruction)
ErrorAllocationRescheduled RunnerDeletedReason = fmt.Errorf("the allocation was rescheduled: %w", ErrorLocalDestruction)
ErrorAllocationStopped RunnerDeletedReason = errors.New("the allocation was stopped")
ErrorAllocationStoppedUnexpectedly RunnerDeletedReason = fmt.Errorf("%w unexpectedly", ErrorAllocationStopped)
ErrorAllocationRescheduledUnexpectedly RunnerDeletedReason = fmt.Errorf(