Fix not canceling monitoring events for removed environments
and runners.
This commit is contained in:

committed by
Sebastian Serth

parent
5d54b0f786
commit
7119f3e012
@@ -1,6 +1,7 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/influxdata/influxdb-client-go/v2/api/write"
|
||||
@@ -22,12 +23,13 @@ type AbstractManager struct {
|
||||
}
|
||||
|
||||
// NewAbstractManager creates a new abstract runner manager that keeps track of all runners of one kind.
|
||||
// Since this manager is currently directly bound to the lifespan of Poseidon, it does not need a context cancel.
|
||||
func NewAbstractManager() *AbstractManager {
|
||||
return &AbstractManager{
|
||||
environments: storage.NewMonitoredLocalStorage[ExecutionEnvironment](
|
||||
monitoring.MeasurementEnvironments, monitorEnvironmentData, 0),
|
||||
monitoring.MeasurementEnvironments, monitorEnvironmentData, 0, context.Background()),
|
||||
usedRunners: storage.NewMonitoredLocalStorage[Runner](
|
||||
monitoring.MeasurementUsedRunner, MonitorRunnersEnvironmentID, time.Hour),
|
||||
monitoring.MeasurementUsedRunner, MonitorRunnersEnvironmentID, time.Hour, context.Background()),
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -37,6 +37,8 @@ type AWSFunctionWorkload struct {
|
||||
runningExecutions map[execution.ID]context.CancelFunc
|
||||
onDestroy DestroyRunnerHandler
|
||||
environment ExecutionEnvironment
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// NewAWSFunctionWorkload creates a new AWSFunctionWorkload with the provided id.
|
||||
@@ -47,15 +49,18 @@ func NewAWSFunctionWorkload(
|
||||
return nil, fmt.Errorf("failed generating runner id: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
workload := &AWSFunctionWorkload{
|
||||
id: newUUID.String(),
|
||||
fs: make(map[dto.FilePath][]byte),
|
||||
runningExecutions: make(map[execution.ID]context.CancelFunc),
|
||||
onDestroy: onDestroy,
|
||||
environment: environment,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
workload.executions = storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](
|
||||
monitoring.MeasurementExecutionsAWS, monitorExecutionsRunnerID(environment.ID(), workload.id), time.Minute)
|
||||
monitoring.MeasurementExecutionsAWS, monitorExecutionsRunnerID(environment.ID(), workload.id), time.Minute, ctx)
|
||||
workload.InactivityTimer = NewInactivityTimer(workload, func(_ Runner) error {
|
||||
return workload.Destroy()
|
||||
})
|
||||
@@ -92,7 +97,7 @@ func (w *AWSFunctionWorkload) ExecuteInteractively(id string, _ io.ReadWriter, s
|
||||
}
|
||||
hideEnvironmentVariables(request, "AWS")
|
||||
request.PrivilegedExecution = true // AWS does not support multiple users at this moment.
|
||||
command, ctx, cancel := prepareExecution(request)
|
||||
command, ctx, cancel := prepareExecution(request, w.ctx)
|
||||
exitInternal := make(chan ExitInfo)
|
||||
exit := make(chan ExitInfo, 1)
|
||||
|
||||
@@ -131,9 +136,7 @@ func (w *AWSFunctionWorkload) GetFileContent(_ string, _ http.ResponseWriter, _
|
||||
}
|
||||
|
||||
func (w *AWSFunctionWorkload) Destroy() error {
|
||||
for _, cancel := range w.runningExecutions {
|
||||
cancel()
|
||||
}
|
||||
w.cancel()
|
||||
if err := w.onDestroy(w); err != nil {
|
||||
return fmt.Errorf("error while destroying aws runner: %w", err)
|
||||
}
|
||||
|
@@ -47,6 +47,8 @@ type NomadJob struct {
|
||||
portMappings []nomadApi.PortMapping
|
||||
api nomad.ExecutorAPI
|
||||
onDestroy DestroyRunnerHandler
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
}
|
||||
|
||||
// NewNomadJob creates a new NomadJob with the provided id.
|
||||
@@ -55,14 +57,17 @@ type NomadJob struct {
|
||||
func NewNomadJob(id string, portMappings []nomadApi.PortMapping,
|
||||
apiClient nomad.ExecutorAPI, onDestroy DestroyRunnerHandler,
|
||||
) *NomadJob {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
job := &NomadJob{
|
||||
id: id,
|
||||
portMappings: portMappings,
|
||||
api: apiClient,
|
||||
onDestroy: onDestroy,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
}
|
||||
job.executions = storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](
|
||||
monitoring.MeasurementExecutionsNomad, monitorExecutionsRunnerID(job.Environment(), id), time.Minute)
|
||||
monitoring.MeasurementExecutionsNomad, monitorExecutionsRunnerID(job.Environment(), id), time.Minute, ctx)
|
||||
job.InactivityTimer = NewInactivityTimer(job, onDestroy)
|
||||
return job
|
||||
}
|
||||
@@ -111,10 +116,10 @@ func (r *NomadJob) ExecuteInteractively(
|
||||
|
||||
r.ResetTimeout()
|
||||
|
||||
command, ctx, cancel := prepareExecution(request)
|
||||
command, ctx, cancel := prepareExecution(request, r.ctx)
|
||||
exitInternal := make(chan ExitInfo)
|
||||
exit := make(chan ExitInfo, 1)
|
||||
ctxExecute, cancelExecute := context.WithCancel(context.Background())
|
||||
ctxExecute, cancelExecute := context.WithCancel(r.ctx)
|
||||
|
||||
go r.executeCommand(ctxExecute, command, request.PrivilegedExecution, stdin, stdout, stderr, exitInternal)
|
||||
go r.handleExitOrContextDone(ctx, cancelExecute, exitInternal, exit, stdin)
|
||||
@@ -203,20 +208,21 @@ func (r *NomadJob) GetFileContent(
|
||||
}
|
||||
|
||||
func (r *NomadJob) Destroy() error {
|
||||
r.cancel()
|
||||
if err := r.onDestroy(r); err != nil {
|
||||
return fmt.Errorf("error while destroying runner: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareExecution(request *dto.ExecutionRequest) (
|
||||
func prepareExecution(request *dto.ExecutionRequest, environmentCtx context.Context) (
|
||||
command []string, ctx context.Context, cancel context.CancelFunc,
|
||||
) {
|
||||
command = request.FullCommand()
|
||||
if request.TimeLimit == 0 {
|
||||
ctx, cancel = context.WithCancel(context.Background())
|
||||
ctx, cancel = context.WithCancel(environmentCtx)
|
||||
} else {
|
||||
ctx, cancel = context.WithTimeout(context.Background(), time.Duration(request.TimeLimit)*time.Second)
|
||||
ctx, cancel = context.WithTimeout(environmentCtx, time.Duration(request.TimeLimit)*time.Second)
|
||||
}
|
||||
return command, ctx, cancel
|
||||
}
|
||||
|
@@ -127,6 +127,7 @@ func (s *ExecuteInteractivelyTestSuite) SetupTest() {
|
||||
id: tests.DefaultRunnerID,
|
||||
api: s.apiMock,
|
||||
onDestroy: s.manager.Return,
|
||||
ctx: context.Background(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -207,6 +208,7 @@ func (s *ExecuteInteractivelyTestSuite) TestDestroysRunnerAfterTimeoutAndSignal(
|
||||
})
|
||||
timeLimit := 1
|
||||
executionRequest := &dto.ExecutionRequest{TimeLimit: timeLimit}
|
||||
s.runner.cancel = func() {}
|
||||
s.runner.StoreExecution(defaultExecutionID, executionRequest)
|
||||
_, _, err := s.runner.ExecuteInteractively(defaultExecutionID, bytes.NewBuffer(make([]byte, 1)), nil, nil)
|
||||
s.Require().NoError(err)
|
||||
|
Reference in New Issue
Block a user