From f377b1376c59a4f3f2a95258678e917e9fef69c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Pa=C3=9F?= <22845248+mpass99@users.noreply.github.com> Date: Sun, 23 Apr 2023 13:18:56 +0100 Subject: [PATCH] Add Client Status to Nomad Allocation monitoring Also add the Nomad Node name as additional debug information. --- internal/nomad/nomad.go | 20 ++++++++++++++++---- pkg/monitoring/influxdb2_middleware.go | 2 ++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/internal/nomad/nomad.go b/internal/nomad/nomad.go index 85abe56..a4b861a 100644 --- a/internal/nomad/nomad.go +++ b/internal/nomad/nomad.go @@ -45,6 +45,8 @@ type allocationData struct { allocClientStatus string jobID string start time.Time + // Just debugging information + allocNomadNode string } // ExecutorAPI provides access to a container orchestration solution. @@ -110,6 +112,8 @@ func NewExecutorAPI(nomadConfig *config.Nomad) (ExecutorAPI, error) { allocations: storage.NewMonitoredLocalStorage[*allocationData](monitoring.MeasurementNomadAllocations, func(p *write.Point, object *allocationData, _ storage.EventType) { p.AddTag(monitoring.InfluxKeyJobID, object.jobID) + p.AddTag(monitoring.InfluxKeyClientStatus, object.allocClientStatus) + p.AddTag(monitoring.InfluxKeyNomadNode, object.allocNomadNode) }, 0, nil), } err := client.init(nomadConfig) @@ -247,8 +251,12 @@ func (a *APIClient) initializeAllocations(environmentID dto.EnvironmentID) { continue case stub.ClientStatus == structs.AllocClientStatusPending || stub.ClientStatus == structs.AllocClientStatusRunning: log.WithField("jobID", stub.JobID).WithField("status", stub.ClientStatus).Debug("Recovered Allocation") - a.allocations.Add(stub.ID, - &allocationData{allocClientStatus: stub.ClientStatus, start: time.Unix(0, stub.CreateTime), jobID: stub.JobID}) + a.allocations.Add(stub.ID, &allocationData{ + allocClientStatus: stub.ClientStatus, + jobID: stub.JobID, + start: time.Unix(0, stub.CreateTime), + allocNomadNode: stub.NodeName, + }) } } } @@ -363,8 +371,12 @@ func handlePendingAllocationEvent(alloc *nomadApi.Allocation, callbacks.OnDeleted(alloc) } // Store Pending Allocation - Allocation gets started, wait until it runs. - allocations.Add(alloc.ID, - &allocationData{allocClientStatus: structs.AllocClientStatusPending, start: time.Now(), jobID: alloc.JobID}) + allocations.Add(alloc.ID, &allocationData{ + allocClientStatus: structs.AllocClientStatusPending, + jobID: alloc.JobID, + start: time.Now(), + allocNomadNode: alloc.NodeName, + }) } else { log.WithField("alloc", alloc).Warn("Other Desired Status") } diff --git a/pkg/monitoring/influxdb2_middleware.go b/pkg/monitoring/influxdb2_middleware.go index d128f5a..39d89d7 100644 --- a/pkg/monitoring/influxdb2_middleware.go +++ b/pkg/monitoring/influxdb2_middleware.go @@ -37,6 +37,8 @@ const ( InfluxKeyRunnerID = "runner_id" InfluxKeyEnvironmentID = "environment_id" InfluxKeyJobID = "job_id" + InfluxKeyClientStatus = "client_status" + InfluxKeyNomadNode = "nomad_agent" InfluxKeyActualContentLength = "actual_length" InfluxKeyExpectedContentLength = "expected_length" InfluxKeyDuration = "duration"