Fix not canceling monitoring events for removed environments

and runners.
This commit is contained in:
Maximilian Paß
2022-10-13 22:17:45 +01:00
committed by Sebastian Serth
parent 5d54b0f786
commit 7119f3e012
9 changed files with 59 additions and 31 deletions

View File

@@ -1,6 +1,7 @@
package runner
import (
"context"
"errors"
"fmt"
"github.com/influxdata/influxdb-client-go/v2/api/write"
@@ -22,12 +23,13 @@ type AbstractManager struct {
}
// NewAbstractManager creates a new abstract runner manager that keeps track of all runners of one kind.
// Since this manager is currently directly bound to the lifespan of Poseidon, it does not need a context cancel.
func NewAbstractManager() *AbstractManager {
return &AbstractManager{
environments: storage.NewMonitoredLocalStorage[ExecutionEnvironment](
monitoring.MeasurementEnvironments, monitorEnvironmentData, 0),
monitoring.MeasurementEnvironments, monitorEnvironmentData, 0, context.Background()),
usedRunners: storage.NewMonitoredLocalStorage[Runner](
monitoring.MeasurementUsedRunner, MonitorRunnersEnvironmentID, time.Hour),
monitoring.MeasurementUsedRunner, MonitorRunnersEnvironmentID, time.Hour, context.Background()),
}
}

View File

@@ -37,6 +37,8 @@ type AWSFunctionWorkload struct {
runningExecutions map[execution.ID]context.CancelFunc
onDestroy DestroyRunnerHandler
environment ExecutionEnvironment
ctx context.Context
cancel context.CancelFunc
}
// NewAWSFunctionWorkload creates a new AWSFunctionWorkload with the provided id.
@@ -47,15 +49,18 @@ func NewAWSFunctionWorkload(
return nil, fmt.Errorf("failed generating runner id: %w", err)
}
ctx, cancel := context.WithCancel(context.Background())
workload := &AWSFunctionWorkload{
id: newUUID.String(),
fs: make(map[dto.FilePath][]byte),
runningExecutions: make(map[execution.ID]context.CancelFunc),
onDestroy: onDestroy,
environment: environment,
ctx: ctx,
cancel: cancel,
}
workload.executions = storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](
monitoring.MeasurementExecutionsAWS, monitorExecutionsRunnerID(environment.ID(), workload.id), time.Minute)
monitoring.MeasurementExecutionsAWS, monitorExecutionsRunnerID(environment.ID(), workload.id), time.Minute, ctx)
workload.InactivityTimer = NewInactivityTimer(workload, func(_ Runner) error {
return workload.Destroy()
})
@@ -92,7 +97,7 @@ func (w *AWSFunctionWorkload) ExecuteInteractively(id string, _ io.ReadWriter, s
}
hideEnvironmentVariables(request, "AWS")
request.PrivilegedExecution = true // AWS does not support multiple users at this moment.
command, ctx, cancel := prepareExecution(request)
command, ctx, cancel := prepareExecution(request, w.ctx)
exitInternal := make(chan ExitInfo)
exit := make(chan ExitInfo, 1)
@@ -131,9 +136,7 @@ func (w *AWSFunctionWorkload) GetFileContent(_ string, _ http.ResponseWriter, _
}
func (w *AWSFunctionWorkload) Destroy() error {
for _, cancel := range w.runningExecutions {
cancel()
}
w.cancel()
if err := w.onDestroy(w); err != nil {
return fmt.Errorf("error while destroying aws runner: %w", err)
}

View File

@@ -47,6 +47,8 @@ type NomadJob struct {
portMappings []nomadApi.PortMapping
api nomad.ExecutorAPI
onDestroy DestroyRunnerHandler
ctx context.Context
cancel context.CancelFunc
}
// NewNomadJob creates a new NomadJob with the provided id.
@@ -55,14 +57,17 @@ type NomadJob struct {
func NewNomadJob(id string, portMappings []nomadApi.PortMapping,
apiClient nomad.ExecutorAPI, onDestroy DestroyRunnerHandler,
) *NomadJob {
ctx, cancel := context.WithCancel(context.Background())
job := &NomadJob{
id: id,
portMappings: portMappings,
api: apiClient,
onDestroy: onDestroy,
ctx: ctx,
cancel: cancel,
}
job.executions = storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](
monitoring.MeasurementExecutionsNomad, monitorExecutionsRunnerID(job.Environment(), id), time.Minute)
monitoring.MeasurementExecutionsNomad, monitorExecutionsRunnerID(job.Environment(), id), time.Minute, ctx)
job.InactivityTimer = NewInactivityTimer(job, onDestroy)
return job
}
@@ -111,10 +116,10 @@ func (r *NomadJob) ExecuteInteractively(
r.ResetTimeout()
command, ctx, cancel := prepareExecution(request)
command, ctx, cancel := prepareExecution(request, r.ctx)
exitInternal := make(chan ExitInfo)
exit := make(chan ExitInfo, 1)
ctxExecute, cancelExecute := context.WithCancel(context.Background())
ctxExecute, cancelExecute := context.WithCancel(r.ctx)
go r.executeCommand(ctxExecute, command, request.PrivilegedExecution, stdin, stdout, stderr, exitInternal)
go r.handleExitOrContextDone(ctx, cancelExecute, exitInternal, exit, stdin)
@@ -203,20 +208,21 @@ func (r *NomadJob) GetFileContent(
}
func (r *NomadJob) Destroy() error {
r.cancel()
if err := r.onDestroy(r); err != nil {
return fmt.Errorf("error while destroying runner: %w", err)
}
return nil
}
func prepareExecution(request *dto.ExecutionRequest) (
func prepareExecution(request *dto.ExecutionRequest, environmentCtx context.Context) (
command []string, ctx context.Context, cancel context.CancelFunc,
) {
command = request.FullCommand()
if request.TimeLimit == 0 {
ctx, cancel = context.WithCancel(context.Background())
ctx, cancel = context.WithCancel(environmentCtx)
} else {
ctx, cancel = context.WithTimeout(context.Background(), time.Duration(request.TimeLimit)*time.Second)
ctx, cancel = context.WithTimeout(environmentCtx, time.Duration(request.TimeLimit)*time.Second)
}
return command, ctx, cancel
}

View File

@@ -127,6 +127,7 @@ func (s *ExecuteInteractivelyTestSuite) SetupTest() {
id: tests.DefaultRunnerID,
api: s.apiMock,
onDestroy: s.manager.Return,
ctx: context.Background(),
}
}
@@ -207,6 +208,7 @@ func (s *ExecuteInteractivelyTestSuite) TestDestroysRunnerAfterTimeoutAndSignal(
})
timeLimit := 1
executionRequest := &dto.ExecutionRequest{TimeLimit: timeLimit}
s.runner.cancel = func() {}
s.runner.StoreExecution(defaultExecutionID, executionRequest)
_, _, err := s.runner.ExecuteInteractively(defaultExecutionID, bytes.NewBuffer(make([]byte, 1)), nil, nil)
s.Require().NoError(err)