Remove usage of context.DeadlineExceeded
for internal decisions as this error is strongly used by other packages. By checking such wrapped errors the internal decision can be influenced accidentally. In this case the retry mechanism checked if the error is context.DeadlineExceeded and assumed it would be created by the internal context. This assumption was wrong.
This commit is contained in:

committed by
Sebastian Serth

parent
6b69a2d732
commit
d0dd5c08cb
@ -31,7 +31,10 @@ const (
|
||||
TimerExpired TimerState = 2
|
||||
)
|
||||
|
||||
var ErrorRunnerInactivityTimeout DestroyReason = errors.New("runner inactivity timeout exceeded")
|
||||
var (
|
||||
ErrorRunnerInactivityTimeout DestroyReason = errors.New("runner inactivity timeout exceeded")
|
||||
ErrorExecutionTimeout = errors.New("execution timeout exceeded")
|
||||
)
|
||||
|
||||
type InactivityTimerImplementation struct {
|
||||
timer *time.Timer
|
||||
|
@ -79,16 +79,17 @@ func (m *NomadRunnerManager) Return(r Runner) error {
|
||||
|
||||
// SynchronizeRunners loads all runners and keeps them synchronized (without a retry mechanism).
|
||||
func (m *NomadRunnerManager) SynchronizeRunners(ctx context.Context) error {
|
||||
// Load Runners
|
||||
log.Info("Loading runners")
|
||||
if err := m.load(); err != nil {
|
||||
return fmt.Errorf("failed loading runners: %w", err)
|
||||
}
|
||||
|
||||
// Watch for changes regarding the existing or new runners.
|
||||
log.Info("Watching Event Stream")
|
||||
err := m.apiClient.WatchEventStream(ctx,
|
||||
&nomad.AllocationProcessing{OnNew: m.onAllocationAdded, OnDeleted: m.onAllocationStopped})
|
||||
|
||||
if err != nil && !(errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled)) {
|
||||
if err != nil && ctx.Err() == nil {
|
||||
err = fmt.Errorf("nomad Event Stream failed!: %w", err)
|
||||
}
|
||||
return err
|
||||
@ -97,7 +98,6 @@ func (m *NomadRunnerManager) SynchronizeRunners(ctx context.Context) error {
|
||||
// Load recovers all runners for all existing environments.
|
||||
func (m *NomadRunnerManager) load() error {
|
||||
newUsedRunners := storage.NewLocalStorage[Runner]()
|
||||
|
||||
for _, environment := range m.environments.List() {
|
||||
environmentLogger := log.WithField(dto.KeyEnvironmentID, environment.ID().ToString())
|
||||
|
||||
|
@ -342,6 +342,9 @@ func (r *NomadJob) handleExit(exitInfo ExitInfo, exitInternal <-chan ExitInfo, e
|
||||
func (r *NomadJob) handleContextDone(exitInternal <-chan ExitInfo, exit chan<- ExitInfo,
|
||||
stdin io.ReadWriter, ctx context.Context) {
|
||||
err := ctx.Err()
|
||||
if errors.Is(err, context.DeadlineExceeded) {
|
||||
err = ErrorExecutionTimeout
|
||||
} // for errors.Is(err, context.Canceled) the user likely disconnected from the execution.
|
||||
if reason, ok := r.ctx.Value(destroyReasonContextKey).(error); ok {
|
||||
err = reason
|
||||
if r.TimeoutPassed() && !errors.Is(err, ErrorRunnerInactivityTimeout) {
|
||||
|
Reference in New Issue
Block a user