From 3d252492fe23f1b42830aab9c4d91317cabb4937 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Pa=C3=9F?= <22845248+mpass99@users.noreply.github.com> Date: Sun, 17 Sep 2023 20:03:33 +0200 Subject: [PATCH] Fix rescheduled used runners being removed. As they are already rescheduled and therefore recreated they do not need to be removed, but can be handled as a new runner. --- internal/runner/nomad_runner.go | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/internal/runner/nomad_runner.go b/internal/runner/nomad_runner.go index 014888d..bb4c1bc 100644 --- a/internal/runner/nomad_runner.go +++ b/internal/runner/nomad_runner.go @@ -246,23 +246,30 @@ func (r *NomadJob) Destroy(reason DestroyReason) (err error) { r.StopTimeout() if r.onDestroy != nil { err = r.onDestroy(r) + if err != nil { + log.WithContext(r.ctx).WithError(err).Warn("runner onDestroy callback failed") + } } - if err == nil && !errors.Is(reason, ErrOOMKilled) { - err = util.RetryExponential(func() (err error) { - if err = r.api.DeleteJob(r.ID()); err != nil { - err = fmt.Errorf("error deleting runner in Nomad: %w", err) - } - return - }) + // local determines if a reason is present that the runner should only be removed locally (without requesting Nomad). + local := errors.Is(reason, nomad.ErrorAllocationRescheduled) || + errors.Is(reason, ErrOOMKilled) + if local { + log.WithContext(r.ctx).Debug("Runner destroyed locally") + return nil } + err = util.RetryExponential(func() (err error) { + if err = r.api.DeleteJob(r.ID()); err != nil { + err = fmt.Errorf("error deleting runner in Nomad: %w", err) + } + return + }) if err != nil { - err = fmt.Errorf("cannot destroy runner: %w", err) - } else { - log.WithContext(r.ctx).Trace("Runner destroyed") + return fmt.Errorf("cannot destroy runner: %w", err) } - return err + log.WithContext(r.ctx).Trace("Runner destroyed") + return nil } func prepareExecution(request *dto.ExecutionRequest, environmentCtx context.Context) (