diff --git a/internal/api/runners.go b/internal/api/runners.go index 9f32cd6..f271290 100644 --- a/internal/api/runners.go +++ b/internal/api/runners.go @@ -76,9 +76,7 @@ func (r *RunnerController) provide(writer http.ResponseWriter, request *http.Req case errors.Is(err, runner.ErrUnknownExecutionEnvironment): writeClientError(writer, err, http.StatusNotFound, request.Context()) case errors.Is(err, runner.ErrNoRunnersAvailable): - log.WithContext(request.Context()). - WithField("environment", logging.RemoveNewlineSymbol(strconv.Itoa(int(environmentID)))). - Warn("No runners available") + log.WithContext(request.Context()).Warn("No runners available") writeInternalServerError(writer, err, dto.ErrorNomadOverload, request.Context()) default: writeInternalServerError(writer, err, dto.ErrorUnknown, request.Context()) @@ -238,6 +236,8 @@ func (r *RunnerController) findRunnerMiddleware(next http.Handler) http.Handler return } ctx := runner.NewContext(request.Context(), targetRunner) + ctx = context.WithValue(ctx, dto.ContextKey(dto.KeyRunnerID), targetRunner.ID()) + ctx = context.WithValue(ctx, dto.ContextKey(dto.KeyEnvironmentID), targetRunner.Environment().ToString()) requestWithRunner := request.WithContext(ctx) next.ServeHTTP(writer, requestWithRunner) }) diff --git a/internal/api/websocket.go b/internal/api/websocket.go index bced43a..c44932d 100644 --- a/internal/api/websocket.go +++ b/internal/api/websocket.go @@ -99,7 +99,7 @@ func (r *RunnerController) connectToRunner(writer http.ResponseWriter, request * defer cancelProxy() proxy := newWebSocketProxy(connection, proxyCtx) - log.WithContext(proxyCtx).WithField("runnerId", targetRunner.ID()). + log.WithContext(proxyCtx). WithField("executionID", logging.RemoveNewlineSymbol(executionID)). Info("Running execution") logging.StartSpan("api.runner.connect", "Execute Interactively", request.Context(), func(ctx context.Context) { diff --git a/internal/environment/nomad_environment.go b/internal/environment/nomad_environment.go index 50d213d..4da8b9b 100644 --- a/internal/environment/nomad_environment.go +++ b/internal/environment/nomad_environment.go @@ -249,11 +249,12 @@ func (n *NomadEnvironment) Sample() (runner.Runner, bool) { return n.createRunner(false) }) if err != nil { - log.WithError(err).WithField("environmentID", n.ID()).Error("Couldn't create new runner for claimed one") + log.WithError(err).WithField(dto.KeyEnvironmentID, n.ID().ToString()). + Error("Couldn't create new runner for claimed one") } }() } else if ok { - log.WithField("environment", n.ID().ToString()).Warn("Too many idle runner") + log.WithField(dto.KeyEnvironmentID, n.ID().ToString()).Warn("Too many idle runner") } return r, ok } @@ -331,7 +332,7 @@ func parseJob(jobHCL string) (*nomadApi.Job, error) { } func (n *NomadEnvironment) createRunners(count uint, forcePull bool) error { - log.WithField("runnersRequired", count).WithField("id", n.ID()).Debug("Creating new runners") + log.WithField("runnersRequired", count).WithField(dto.KeyEnvironmentID, n.ID()).Debug("Creating new runners") for i := 0; i < int(count); i++ { err := n.createRunner(forcePull) if err != nil { diff --git a/internal/nomad/api_querier.go b/internal/nomad/api_querier.go index 4f47cf6..5c218ce 100644 --- a/internal/nomad/api_querier.go +++ b/internal/nomad/api_querier.go @@ -132,10 +132,10 @@ func (nc *nomadAPIClient) Execute(runnerID string, case err == nil: return exitCode, nil case websocket.IsCloseError(errors.Unwrap(err), websocket.CloseNormalClosure): - log.WithContext(ctx).WithField("runnerID", runnerID).WithError(err).Info("The exit code could not be received.") + log.WithContext(ctx).WithError(err).Info("The exit code could not be received.") return 0, nil case errors.Is(err, context.Canceled): - log.WithContext(ctx).WithField("runnerID", runnerID).Debug("Execution canceled by context") + log.WithContext(ctx).Debug("Execution canceled by context") return 0, nil default: return 1, fmt.Errorf("error executing command in allocation: %w", err) diff --git a/internal/nomad/nomad.go b/internal/nomad/nomad.go index d85fc0f..5a0feda 100644 --- a/internal/nomad/nomad.go +++ b/internal/nomad/nomad.go @@ -596,7 +596,7 @@ func (a *APIClient) executeCommandInteractivelyWithStderr(allocationID string, c exit, err := a.Execute(allocationID, ctx, prepareCommandTTYStdErr(currentNanoTime, privilegedExecution), true, nullio.Reader{Ctx: readingContext}, stderr, io.Discard) if err != nil { - log.WithContext(ctx).WithError(err).WithField("runner", allocationID).Warn("Stderr task finished with error") + log.WithContext(ctx).WithError(err).Warn("Stderr task finished with error") } stderrExitChan <- exit }) diff --git a/internal/runner/inactivity_timer.go b/internal/runner/inactivity_timer.go index 689b53a..7d2b07d 100644 --- a/internal/runner/inactivity_timer.go +++ b/internal/runner/inactivity_timer.go @@ -2,6 +2,7 @@ package runner import ( "errors" + "github.com/openHPI/poseidon/pkg/dto" "sync" "time" ) @@ -70,9 +71,10 @@ func (t *InactivityTimerImplementation) SetupTimeout(duration time.Duration) { t.mu.Unlock() err := t.onDestroy(t.runner) if err != nil { - log.WithError(err).WithField("id", t.runner.ID()).Warn("Returning runner after inactivity caused an error") + log.WithError(err).WithField(dto.KeyRunnerID, t.runner.ID()). + Warn("Returning runner after inactivity caused an error") } else { - log.WithField("id", t.runner.ID()).Info("Returning runner due to inactivity timeout") + log.WithField(dto.KeyRunnerID, t.runner.ID()).Info("Returning runner due to inactivity timeout") } }) } diff --git a/internal/runner/nomad_manager.go b/internal/runner/nomad_manager.go index 401ab31..8e5afbd 100644 --- a/internal/runner/nomad_manager.go +++ b/internal/runner/nomad_manager.go @@ -64,7 +64,7 @@ func (m *NomadRunnerManager) markRunnerAsUsed(runner Runner, timeoutDuration int if err != nil { err := m.Return(runner) if err != nil { - log.WithError(err).WithField("runnerID", runner.ID()).Error("can't mark runner as used and can't return runner") + log.WithError(err).WithField(dto.KeyRunnerID, runner.ID()).Error("can't mark runner as used and can't return runner") } } } @@ -86,7 +86,7 @@ func (m *NomadRunnerManager) Return(r Runner) error { func (m *NomadRunnerManager) Load() { for _, environment := range m.environments.List() { - environmentLogger := log.WithField("environmentID", environment.ID()) + environmentLogger := log.WithField(dto.KeyEnvironmentID, environment.ID().ToString()) runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID()) if err != nil { environmentLogger.WithError(err).Warn("Error fetching the runner jobs") @@ -115,7 +115,7 @@ func (m *NomadRunnerManager) loadSingleJob(job *nomadApi.Job, environmentLogger return } newJob := NewNomadJob(*job.ID, portMappings, m.apiClient, m.Return) - log.WithField("isUsed", isUsed).WithField("runner_id", newJob.ID()).Debug("Recovered Runner") + log.WithField("isUsed", isUsed).WithField(dto.KeyRunnerID, newJob.ID()).Debug("Recovered Runner") if isUsed { m.usedRunners.Add(newJob.ID(), newJob) timeout, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaTimeoutKey]) @@ -141,14 +141,15 @@ func (m *NomadRunnerManager) keepRunnersSynced(ctx context.Context) { } func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation, startup time.Duration) { - log.WithField("id", alloc.JobID).WithField("startupDuration", startup).Debug("Runner started") + log.WithField(dto.KeyRunnerID, alloc.JobID).WithField("startupDuration", startup).Debug("Runner started") if nomad.IsEnvironmentTemplateID(alloc.JobID) { return } if _, ok := m.usedRunners.Get(alloc.JobID); ok { - log.WithField("id", alloc.JobID).WithField("states", alloc.TaskStates).Error("Started Runner is already in use") + log.WithField(dto.KeyRunnerID, alloc.JobID).WithField("states", alloc.TaskStates). + Error("Started Runner is already in use") return } @@ -178,7 +179,7 @@ func monitorAllocationStartupDuration(startup time.Duration, runnerID string, en } func (m *NomadRunnerManager) onAllocationStopped(runnerID string) (alreadyRemoved bool) { - log.WithField("id", runnerID).Debug("Runner stopped") + log.WithField(dto.KeyRunnerID, runnerID).Debug("Runner stopped") if nomad.IsEnvironmentTemplateID(runnerID) { return false diff --git a/internal/runner/nomad_runner.go b/internal/runner/nomad_runner.go index 287f2bd..068a47f 100644 --- a/internal/runner/nomad_runner.go +++ b/internal/runner/nomad_runner.go @@ -201,7 +201,7 @@ func (r *NomadJob) GetFileContent( p.AddTag(monitoring.InfluxKeyRunnerID, r.ID()) environmentID, err := nomad.EnvironmentIDFromRunnerID(r.ID()) if err != nil { - log.WithContext(ctx).WithField("runnerID", r.ID()).WithError(err).Warn("can not parse environment id") + log.WithContext(ctx).WithError(err).Warn("can not parse environment id") } p.AddTag(monitoring.InfluxKeyEnvironmentID, environmentID.ToString()) defer contentLengthWriter.SendMonitoringData(p) @@ -283,16 +283,16 @@ func (r *NomadJob) handleExitOrContextDone(ctx context.Context, cancelExecute co // log.WithField("runner", r.id).Warn("Could not send SIGQUIT because nothing was written") // } if err != nil { - log.WithContext(ctx).WithField("runner", r.id).WithError(err).Warn("Could not send SIGQUIT due to error") + log.WithContext(ctx).WithError(err).Warn("Could not send SIGQUIT due to error") } select { case <-exitInternal: - log.WithContext(ctx).WithField("runner", r.id).Debug("Execution terminated after SIGQUIT") + log.WithContext(ctx).Debug("Execution terminated after SIGQUIT") case <-time.After(executionTimeoutGracePeriod): - log.WithContext(ctx).WithField("runner", r.id).Info("Execution did not quit after SIGQUIT") + log.WithContext(ctx).Info("Execution did not quit after SIGQUIT") if err := r.Destroy(); err != nil { - log.WithContext(ctx).WithField("runner", r.id).Error("Error when destroying runner") + log.WithContext(ctx).Error("Error when destroying runner") } } } diff --git a/pkg/dto/dto.go b/pkg/dto/dto.go index 553cb5c..b94cc44 100644 --- a/pkg/dto/dto.go +++ b/pkg/dto/dto.go @@ -184,6 +184,12 @@ func (f File) ByteContent() []byte { // ContextKey is the type for keys in a request context that is used for passing data to the next handler. type ContextKey string +// Keys to reference information (for logging or monitoring). +const ( + KeyRunnerID = "runner_id" + KeyEnvironmentID = "environment_id" +) + // WebSocketMessageType is the type for the messages from Poseidon to the client. type WebSocketMessageType string diff --git a/pkg/logging/sentry_hook.go b/pkg/logging/sentry_hook.go index 0914b76..bb3d656 100644 --- a/pkg/logging/sentry_hook.go +++ b/pkg/logging/sentry_hook.go @@ -3,6 +3,7 @@ package logging import ( "context" "github.com/getsentry/sentry-go" + "github.com/openHPI/poseidon/pkg/dto" "github.com/sirupsen/logrus" ) @@ -28,6 +29,9 @@ func (hook *SentryHook) Fire(entry *logrus.Entry) error { var hub *sentry.Hub if entry.Context != nil { hub = sentry.GetHubFromContext(entry.Context) + // This might overwrite valid data when not the request context is passed. + entry.Data[dto.KeyRunnerID] = entry.Context.Value(dto.ContextKey(dto.KeyRunnerID)) + entry.Data[dto.KeyEnvironmentID] = entry.Context.Value(dto.ContextKey(dto.KeyEnvironmentID)) } if hub == nil { hub = sentry.CurrentHub() diff --git a/pkg/monitoring/influxdb2_middleware.go b/pkg/monitoring/influxdb2_middleware.go index b60e57a..bda87f4 100644 --- a/pkg/monitoring/influxdb2_middleware.go +++ b/pkg/monitoring/influxdb2_middleware.go @@ -34,8 +34,8 @@ const ( // The keys for the monitored tags and fields. - InfluxKeyRunnerID = "runner_id" - InfluxKeyEnvironmentID = "environment_id" + InfluxKeyRunnerID = dto.KeyRunnerID + InfluxKeyEnvironmentID = dto.KeyEnvironmentID InfluxKeyJobID = "job_id" InfluxKeyClientStatus = "client_status" InfluxKeyNomadNode = "nomad_agent"