Always log Runner and Environment ID.

Systematically log the runner id and the environment id by adding the information at the findRunnerMiddleware.
2023-07-14 18:17:42 +02:00
parent 0bfef5e105
commit e7df777db4
11 changed files with 39 additions and 25 deletions
--- a/internal/runner/inactivity_timer.go
+++ b/internal/runner/inactivity_timer.go
@ -2,6 +2,7 @@ package runner

 import (
 	"errors"
+	"github.com/openHPI/poseidon/pkg/dto"
 	"sync"
 	"time"
 )
@ -70,9 +71,10 @@ func (t *InactivityTimerImplementation) SetupTimeout(duration time.Duration) {
 		t.mu.Unlock()
 		err := t.onDestroy(t.runner)
 		if err != nil {
-			log.WithError(err).WithField("id", t.runner.ID()).Warn("Returning runner after inactivity caused an error")
+			log.WithError(err).WithField(dto.KeyRunnerID, t.runner.ID()).
+				Warn("Returning runner after inactivity caused an error")
 		} else {
-			log.WithField("id", t.runner.ID()).Info("Returning runner due to inactivity timeout")
+			log.WithField(dto.KeyRunnerID, t.runner.ID()).Info("Returning runner due to inactivity timeout")
 		}
 	})
 }
--- a/internal/runner/nomad_manager.go
+++ b/internal/runner/nomad_manager.go
@ -64,7 +64,7 @@ func (m *NomadRunnerManager) markRunnerAsUsed(runner Runner, timeoutDuration int
 	if err != nil {
 		err := m.Return(runner)
 		if err != nil {
-			log.WithError(err).WithField("runnerID", runner.ID()).Error("can't mark runner as used and can't return runner")
+			log.WithError(err).WithField(dto.KeyRunnerID, runner.ID()).Error("can't mark runner as used and can't return runner")
 		}
 	}
 }
@ -86,7 +86,7 @@ func (m *NomadRunnerManager) Return(r Runner) error {

 func (m *NomadRunnerManager) Load() {
 	for _, environment := range m.environments.List() {
-		environmentLogger := log.WithField("environmentID", environment.ID())
+		environmentLogger := log.WithField(dto.KeyEnvironmentID, environment.ID().ToString())
 		runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID())
 		if err != nil {
 			environmentLogger.WithError(err).Warn("Error fetching the runner jobs")
@ -115,7 +115,7 @@ func (m *NomadRunnerManager) loadSingleJob(job *nomadApi.Job, environmentLogger
 		return
 	}
 	newJob := NewNomadJob(*job.ID, portMappings, m.apiClient, m.Return)
-	log.WithField("isUsed", isUsed).WithField("runner_id", newJob.ID()).Debug("Recovered Runner")
+	log.WithField("isUsed", isUsed).WithField(dto.KeyRunnerID, newJob.ID()).Debug("Recovered Runner")
 	if isUsed {
 		m.usedRunners.Add(newJob.ID(), newJob)
 		timeout, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaTimeoutKey])
@ -141,14 +141,15 @@ func (m *NomadRunnerManager) keepRunnersSynced(ctx context.Context) {
 }

 func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation, startup time.Duration) {
-	log.WithField("id", alloc.JobID).WithField("startupDuration", startup).Debug("Runner started")
+	log.WithField(dto.KeyRunnerID, alloc.JobID).WithField("startupDuration", startup).Debug("Runner started")

 	if nomad.IsEnvironmentTemplateID(alloc.JobID) {
 		return
 	}

 	if _, ok := m.usedRunners.Get(alloc.JobID); ok {
-		log.WithField("id", alloc.JobID).WithField("states", alloc.TaskStates).Error("Started Runner is already in use")
+		log.WithField(dto.KeyRunnerID, alloc.JobID).WithField("states", alloc.TaskStates).
+			Error("Started Runner is already in use")
 		return
 	}

@ -178,7 +179,7 @@ func monitorAllocationStartupDuration(startup time.Duration, runnerID string, en
 }

 func (m *NomadRunnerManager) onAllocationStopped(runnerID string) (alreadyRemoved bool) {
-	log.WithField("id", runnerID).Debug("Runner stopped")
+	log.WithField(dto.KeyRunnerID, runnerID).Debug("Runner stopped")

 	if nomad.IsEnvironmentTemplateID(runnerID) {
 		return false
--- a/internal/runner/nomad_runner.go
+++ b/internal/runner/nomad_runner.go
@ -201,7 +201,7 @@ func (r *NomadJob) GetFileContent(
 	p.AddTag(monitoring.InfluxKeyRunnerID, r.ID())
 	environmentID, err := nomad.EnvironmentIDFromRunnerID(r.ID())
 	if err != nil {
-		log.WithContext(ctx).WithField("runnerID", r.ID()).WithError(err).Warn("can not parse environment id")
+		log.WithContext(ctx).WithError(err).Warn("can not parse environment id")
 	}
 	p.AddTag(monitoring.InfluxKeyEnvironmentID, environmentID.ToString())
 	defer contentLengthWriter.SendMonitoringData(p)
@ -283,16 +283,16 @@ func (r *NomadJob) handleExitOrContextDone(ctx context.Context, cancelExecute co
 	// log.WithField("runner", r.id).Warn("Could not send SIGQUIT because nothing was written")
 	// }
 	if err != nil {
-		log.WithContext(ctx).WithField("runner", r.id).WithError(err).Warn("Could not send SIGQUIT due to error")
+		log.WithContext(ctx).WithError(err).Warn("Could not send SIGQUIT due to error")
 	}

 	select {
 	case <-exitInternal:
-		log.WithContext(ctx).WithField("runner", r.id).Debug("Execution terminated after SIGQUIT")
+		log.WithContext(ctx).Debug("Execution terminated after SIGQUIT")
 	case <-time.After(executionTimeoutGracePeriod):
-		log.WithContext(ctx).WithField("runner", r.id).Info("Execution did not quit after SIGQUIT")
+		log.WithContext(ctx).Info("Execution did not quit after SIGQUIT")
 		if err := r.Destroy(); err != nil {
-			log.WithContext(ctx).WithField("runner", r.id).Error("Error when destroying runner")
+			log.WithContext(ctx).Error("Error when destroying runner")
 		}
 	}
 }