Refactor Nomad Recovery

from an approach that loaded the runners only once at the startup to a method that will be repeated i.e. if the Nomad Event Stream connection interrupts.
2023-10-23 14:36:14 +02:00
parent b2898f9183
commit 6b69a2d732
22 changed files with 211 additions and 120 deletions
--- a/internal/nomad/nomad.go
+++ b/internal/nomad/nomad.go
@@ -27,8 +27,9 @@ var (
 	ErrorPlacingAllocations                                    = errors.New("failed to place all allocations")
 	ErrorLoadingJob                                            = errors.New("failed to load job")
 	ErrorNoAllocatedResourcesFound                             = errors.New("no allocated resources found")
-	ErrorOOMKilled                         RunnerDeletedReason = errors.New("the allocation was OOM Killed")
-	ErrorAllocationRescheduled             RunnerDeletedReason = errors.New("the allocation was rescheduled")
+	ErrorLocalDestruction                  RunnerDeletedReason = errors.New("the destruction should not cause external changes")
+	ErrorOOMKilled                         RunnerDeletedReason = fmt.Errorf("%s: %w", dto.ErrOOMKilled.Error(), ErrorLocalDestruction)
+	ErrorAllocationRescheduled             RunnerDeletedReason = fmt.Errorf("the allocation was rescheduled: %w", ErrorLocalDestruction)
 	ErrorAllocationStopped                 RunnerDeletedReason = errors.New("the allocation was stopped")
 	ErrorAllocationStoppedUnexpectedly     RunnerDeletedReason = fmt.Errorf("%w unexpectedly", ErrorAllocationStopped)
 	ErrorAllocationRescheduledUnexpectedly RunnerDeletedReason = fmt.Errorf(