Add Client Status to Nomad Allocation monitoring

Also add the Nomad Node name as additional debug information.
This commit is contained in:
Maximilian Paß
2023-04-23 13:18:56 +01:00
parent 8f89c14ea1
commit f377b1376c
2 changed files with 18 additions and 4 deletions

View File

@ -45,6 +45,8 @@ type allocationData struct {
allocClientStatus string allocClientStatus string
jobID string jobID string
start time.Time start time.Time
// Just debugging information
allocNomadNode string
} }
// ExecutorAPI provides access to a container orchestration solution. // ExecutorAPI provides access to a container orchestration solution.
@ -110,6 +112,8 @@ func NewExecutorAPI(nomadConfig *config.Nomad) (ExecutorAPI, error) {
allocations: storage.NewMonitoredLocalStorage[*allocationData](monitoring.MeasurementNomadAllocations, allocations: storage.NewMonitoredLocalStorage[*allocationData](monitoring.MeasurementNomadAllocations,
func(p *write.Point, object *allocationData, _ storage.EventType) { func(p *write.Point, object *allocationData, _ storage.EventType) {
p.AddTag(monitoring.InfluxKeyJobID, object.jobID) p.AddTag(monitoring.InfluxKeyJobID, object.jobID)
p.AddTag(monitoring.InfluxKeyClientStatus, object.allocClientStatus)
p.AddTag(monitoring.InfluxKeyNomadNode, object.allocNomadNode)
}, 0, nil), }, 0, nil),
} }
err := client.init(nomadConfig) err := client.init(nomadConfig)
@ -247,8 +251,12 @@ func (a *APIClient) initializeAllocations(environmentID dto.EnvironmentID) {
continue continue
case stub.ClientStatus == structs.AllocClientStatusPending || stub.ClientStatus == structs.AllocClientStatusRunning: case stub.ClientStatus == structs.AllocClientStatusPending || stub.ClientStatus == structs.AllocClientStatusRunning:
log.WithField("jobID", stub.JobID).WithField("status", stub.ClientStatus).Debug("Recovered Allocation") log.WithField("jobID", stub.JobID).WithField("status", stub.ClientStatus).Debug("Recovered Allocation")
a.allocations.Add(stub.ID, a.allocations.Add(stub.ID, &allocationData{
&allocationData{allocClientStatus: stub.ClientStatus, start: time.Unix(0, stub.CreateTime), jobID: stub.JobID}) allocClientStatus: stub.ClientStatus,
jobID: stub.JobID,
start: time.Unix(0, stub.CreateTime),
allocNomadNode: stub.NodeName,
})
} }
} }
} }
@ -363,8 +371,12 @@ func handlePendingAllocationEvent(alloc *nomadApi.Allocation,
callbacks.OnDeleted(alloc) callbacks.OnDeleted(alloc)
} }
// Store Pending Allocation - Allocation gets started, wait until it runs. // Store Pending Allocation - Allocation gets started, wait until it runs.
allocations.Add(alloc.ID, allocations.Add(alloc.ID, &allocationData{
&allocationData{allocClientStatus: structs.AllocClientStatusPending, start: time.Now(), jobID: alloc.JobID}) allocClientStatus: structs.AllocClientStatusPending,
jobID: alloc.JobID,
start: time.Now(),
allocNomadNode: alloc.NodeName,
})
} else { } else {
log.WithField("alloc", alloc).Warn("Other Desired Status") log.WithField("alloc", alloc).Warn("Other Desired Status")
} }

View File

@ -37,6 +37,8 @@ const (
InfluxKeyRunnerID = "runner_id" InfluxKeyRunnerID = "runner_id"
InfluxKeyEnvironmentID = "environment_id" InfluxKeyEnvironmentID = "environment_id"
InfluxKeyJobID = "job_id" InfluxKeyJobID = "job_id"
InfluxKeyClientStatus = "client_status"
InfluxKeyNomadNode = "nomad_agent"
InfluxKeyActualContentLength = "actual_length" InfluxKeyActualContentLength = "actual_length"
InfluxKeyExpectedContentLength = "expected_length" InfluxKeyExpectedContentLength = "expected_length"
InfluxKeyDuration = "duration" InfluxKeyDuration = "duration"