Monitor Nomad allocation startup duration.
This commit is contained in:
@@ -7,7 +7,6 @@ import (
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/pkg/monitoring"
|
||||
"github.com/openHPI/poseidon/pkg/storage"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
var ErrNullObject = errors.New("functionality not available for the null object")
|
||||
@@ -33,7 +32,7 @@ func NewAbstractManager() *AbstractManager {
|
||||
// MonitorRunnersEnvironmentID passes the id of the environment e into the monitoring Point p.
|
||||
func MonitorRunnersEnvironmentID(p *write.Point, e Runner, isDeletion bool) {
|
||||
if !isDeletion && e != nil {
|
||||
p.AddTag(monitoring.InfluxKeyEnvironmentID, strconv.Itoa(int(e.Environment())))
|
||||
p.AddTag(monitoring.InfluxKeyEnvironmentID, e.Environment().ToString())
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -5,9 +5,11 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
nomadApi "github.com/hashicorp/nomad/api"
|
||||
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
|
||||
"github.com/openHPI/poseidon/internal/nomad"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/pkg/logging"
|
||||
"github.com/openHPI/poseidon/pkg/monitoring"
|
||||
"github.com/sirupsen/logrus"
|
||||
"strconv"
|
||||
"time"
|
||||
@@ -118,15 +120,16 @@ func (m *NomadRunnerManager) loadSingleJob(job *nomadApi.Job, environmentLogger
|
||||
func (m *NomadRunnerManager) keepRunnersSynced(ctx context.Context) {
|
||||
retries := 0
|
||||
for ctx.Err() == nil {
|
||||
err := m.apiClient.WatchEventStream(ctx, m.onAllocationAdded, m.onAllocationStopped)
|
||||
err := m.apiClient.WatchEventStream(ctx,
|
||||
&nomad.AllocationProcessoring{OnNew: m.onAllocationAdded, OnDeleted: m.onAllocationStopped})
|
||||
retries += 1
|
||||
log.WithError(err).Errorf("Stopped updating the runners! Retry %v", retries)
|
||||
<-time.After(time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation) {
|
||||
log.WithField("id", alloc.JobID).Debug("Runner started")
|
||||
func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation, startup time.Duration) {
|
||||
log.WithField("id", alloc.JobID).WithField("startupDuration", startup).Debug("Runner started")
|
||||
|
||||
if nomad.IsEnvironmentTemplateID(alloc.JobID) {
|
||||
return
|
||||
@@ -145,9 +148,18 @@ func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation) {
|
||||
mappedPorts = alloc.AllocatedResources.Shared.Ports
|
||||
}
|
||||
environment.AddRunner(NewNomadJob(alloc.JobID, mappedPorts, m.apiClient, m.Return))
|
||||
monitorAllocationStartupDuration(startup, alloc.JobID, environmentID)
|
||||
}
|
||||
}
|
||||
|
||||
func monitorAllocationStartupDuration(startup time.Duration, runnerID string, environmentID dto.EnvironmentID) {
|
||||
p := influxdb2.NewPointWithMeasurement(monitoring.MeasurementIdleRunnerNomad)
|
||||
p.AddField(monitoring.InfluxKeyDuration, startup)
|
||||
p.AddTag(monitoring.InfluxKeyEnvironmentID, environmentID.ToString())
|
||||
p.AddTag(monitoring.InfluxKeyRunnerID, runnerID)
|
||||
monitoring.WriteInfluxPoint(p)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) onAllocationStopped(alloc *nomadApi.Allocation) {
|
||||
log.WithField("id", alloc.JobID).Debug("Runner stopped")
|
||||
|
||||
|
@@ -227,9 +227,9 @@ func (s *ManagerTestSuite) TestUpdateRunnersAddsIdleRunner() {
|
||||
|
||||
modifyMockedCall(s.apiMock, "WatchEventStream", func(call *mock.Call) {
|
||||
call.Run(func(args mock.Arguments) {
|
||||
onCreate, ok := args.Get(1).(nomad.AllocationProcessor)
|
||||
callbacks, ok := args.Get(1).(*nomad.AllocationProcessoring)
|
||||
s.Require().True(ok)
|
||||
onCreate(allocation)
|
||||
callbacks.OnNew(allocation, 0)
|
||||
call.ReturnArguments = mock.Arguments{nil}
|
||||
})
|
||||
})
|
||||
@@ -255,9 +255,9 @@ func (s *ManagerTestSuite) TestUpdateRunnersRemovesIdleAndUsedRunner() {
|
||||
|
||||
modifyMockedCall(s.apiMock, "WatchEventStream", func(call *mock.Call) {
|
||||
call.Run(func(args mock.Arguments) {
|
||||
onDelete, ok := args.Get(2).(nomad.AllocationProcessor)
|
||||
callbacks, ok := args.Get(1).(*nomad.AllocationProcessoring)
|
||||
s.Require().True(ok)
|
||||
onDelete(allocation)
|
||||
callbacks.OnDeleted(allocation)
|
||||
call.ReturnArguments = mock.Arguments{nil}
|
||||
})
|
||||
})
|
||||
@@ -288,7 +288,7 @@ func (s *ManagerTestSuite) TestOnAllocationAdded() {
|
||||
mockIdleRunners(environment.(*ExecutionEnvironmentMock))
|
||||
|
||||
alloc := &nomadApi.Allocation{JobID: nomad.TemplateJobID(tests.DefaultEnvironmentIDAsInteger)}
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc)
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc, 0)
|
||||
|
||||
_, ok = environment.Sample()
|
||||
s.False(ok)
|
||||
@@ -296,7 +296,7 @@ func (s *ManagerTestSuite) TestOnAllocationAdded() {
|
||||
s.Run("does not panic when environment id cannot be parsed", func() {
|
||||
alloc := &nomadApi.Allocation{JobID: ""}
|
||||
s.NotPanics(func() {
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc)
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc, 0)
|
||||
})
|
||||
})
|
||||
s.Run("does not panic when environment does not exist", func() {
|
||||
@@ -306,7 +306,7 @@ func (s *ManagerTestSuite) TestOnAllocationAdded() {
|
||||
|
||||
alloc := &nomadApi.Allocation{JobID: nomad.RunnerJobID(nonExistentEnvironment, "1-1-1-1")}
|
||||
s.NotPanics(func() {
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc)
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc, 0)
|
||||
})
|
||||
})
|
||||
s.Run("adds correct job", func() {
|
||||
@@ -319,7 +319,7 @@ func (s *ManagerTestSuite) TestOnAllocationAdded() {
|
||||
JobID: tests.DefaultRunnerID,
|
||||
AllocatedResources: nil,
|
||||
}
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc)
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc, 0)
|
||||
|
||||
runner, ok := environment.Sample()
|
||||
s.True(ok)
|
||||
@@ -339,7 +339,7 @@ func (s *ManagerTestSuite) TestOnAllocationAdded() {
|
||||
Shared: nomadApi.AllocatedSharedResources{Ports: tests.DefaultPortMappings},
|
||||
},
|
||||
}
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc)
|
||||
s.nomadRunnerManager.onAllocationAdded(alloc, 0)
|
||||
|
||||
runner, ok := environment.Sample()
|
||||
s.True(ok)
|
||||
|
@@ -7,7 +7,6 @@ import (
|
||||
"github.com/openHPI/poseidon/pkg/monitoring"
|
||||
"github.com/openHPI/poseidon/pkg/storage"
|
||||
"io"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type ExitInfo struct {
|
||||
@@ -69,7 +68,7 @@ func monitorExecutionsRunnerID(env dto.EnvironmentID, runnerID string) storage.W
|
||||
return func(p *write.Point, e *dto.ExecutionRequest, isDeletion bool) {
|
||||
if !isDeletion && e != nil {
|
||||
p.AddTag(monitoring.InfluxKeyRunnerID, runnerID)
|
||||
p.AddTag(monitoring.InfluxKeyEnvironmentID, strconv.Itoa(int(env)))
|
||||
p.AddTag(monitoring.InfluxKeyEnvironmentID, env.ToString())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user