#110 Refactor influxdb monitoring

to use it as singleton.
This enables the possibility to monitor processes that are independent of an incoming request.
This commit is contained in:
Maximilian Paß
2022-06-29 20:05:19 +02:00
parent eafc01e69a
commit 498e8f5ff5
19 changed files with 174 additions and 133 deletions

View File

@ -4,14 +4,13 @@ import (
"context" "context"
"errors" "errors"
"github.com/getsentry/sentry-go" "github.com/getsentry/sentry-go"
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
influxdb2API "github.com/influxdata/influxdb-client-go/v2/api"
"github.com/openHPI/poseidon/internal/api" "github.com/openHPI/poseidon/internal/api"
"github.com/openHPI/poseidon/internal/config" "github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/environment" "github.com/openHPI/poseidon/internal/environment"
"github.com/openHPI/poseidon/internal/nomad" "github.com/openHPI/poseidon/internal/nomad"
"github.com/openHPI/poseidon/internal/runner" "github.com/openHPI/poseidon/internal/runner"
"github.com/openHPI/poseidon/pkg/logging" "github.com/openHPI/poseidon/pkg/logging"
"github.com/openHPI/poseidon/pkg/monitoring"
"net/http" "net/http"
"os" "os"
"os/signal" "os/signal"
@ -37,20 +36,6 @@ func shutdownSentry() {
} }
} }
func initInfluxDB(db *config.InfluxDB) (writeAPI influxdb2API.WriteAPI, cancel func()) {
if db.URL == "" {
return nil, func() {}
}
client := influxdb2.NewClient(db.URL, db.Token)
writeAPI = client.WriteAPI(db.Organization, db.Bucket)
cancel = func() {
writeAPI.Flush()
client.Close()
}
return writeAPI, cancel
}
func runServer(server *http.Server) { func runServer(server *http.Server) {
log.WithField("address", server.Addr).Info("Starting server") log.WithField("address", server.Addr).Info("Starting server")
var err error var err error
@ -111,7 +96,7 @@ func createAWSManager() (runnerManager runner.Manager, environmentManager enviro
} }
// initServer builds the http server and configures it with the chain of responsibility for multiple managers. // initServer builds the http server and configures it with the chain of responsibility for multiple managers.
func initServer(influxClient influxdb2API.WriteAPI) *http.Server { func initServer() *http.Server {
runnerManager, environmentManager := createManagerHandler(createNomadManager, config.Config.Nomad.Enabled, runnerManager, environmentManager := createManagerHandler(createNomadManager, config.Config.Nomad.Enabled,
nil, nil) nil, nil)
runnerManager, environmentManager = createManagerHandler(createAWSManager, config.Config.AWS.Enabled, runnerManager, environmentManager = createManagerHandler(createAWSManager, config.Config.AWS.Enabled,
@ -121,7 +106,7 @@ func initServer(influxClient influxdb2API.WriteAPI) *http.Server {
Addr: config.Config.Server.URL().Host, Addr: config.Config.Server.URL().Host,
ReadTimeout: time.Second * 15, ReadTimeout: time.Second * 15,
IdleTimeout: time.Second * 60, IdleTimeout: time.Second * 60,
Handler: api.NewRouter(runnerManager, environmentManager, influxClient), Handler: api.NewRouter(runnerManager, environmentManager),
} }
} }
@ -149,10 +134,10 @@ func main() {
initSentry(&config.Config.Sentry) initSentry(&config.Config.Sentry)
defer shutdownSentry() defer shutdownSentry()
influxDB, cancel := initInfluxDB(&config.Config.InfluxDB) cancel := monitoring.InitializeInfluxDB(&config.Config.InfluxDB)
defer cancel() defer cancel()
server := initServer(influxDB) server := initServer()
go runServer(server) go runServer(server)
shutdownOnOSSignal(server) shutdownOnOSSignal(server)
} }

View File

@ -2,7 +2,6 @@ package api
import ( import (
"github.com/gorilla/mux" "github.com/gorilla/mux"
influxdb2API "github.com/influxdata/influxdb-client-go/v2/api"
"github.com/openHPI/poseidon/internal/api/auth" "github.com/openHPI/poseidon/internal/api/auth"
"github.com/openHPI/poseidon/internal/config" "github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/environment" "github.com/openHPI/poseidon/internal/environment"
@ -29,14 +28,13 @@ const (
// always returns a router for the newest version of our API. We // always returns a router for the newest version of our API. We
// use gorilla/mux because it is more convenient than net/http, e.g. // use gorilla/mux because it is more convenient than net/http, e.g.
// when extracting path parameters. // when extracting path parameters.
func NewRouter(runnerManager runner.Manager, environmentManager environment.ManagerHandler, func NewRouter(runnerManager runner.Manager, environmentManager environment.ManagerHandler) *mux.Router {
influxClient influxdb2API.WriteAPI) *mux.Router {
router := mux.NewRouter() router := mux.NewRouter()
// this can later be restricted to a specific host with // this can later be restricted to a specific host with
// `router.Host(...)` and to HTTPS with `router.Schemes("https")` // `router.Host(...)` and to HTTPS with `router.Schemes("https")`
configureV1Router(router, runnerManager, environmentManager) configureV1Router(router, runnerManager, environmentManager)
router.Use(logging.HTTPLoggingMiddleware) router.Use(logging.HTTPLoggingMiddleware)
router.Use(monitoring.InfluxDB2Middleware(influxClient, environmentManager)) router.Use(monitoring.InfluxDB2Middleware)
return router return router
} }

View File

@ -31,7 +31,7 @@ func TestEnvironmentControllerTestSuite(t *testing.T) {
func (s *EnvironmentControllerTestSuite) SetupTest() { func (s *EnvironmentControllerTestSuite) SetupTest() {
s.manager = &environment.ManagerHandlerMock{} s.manager = &environment.ManagerHandlerMock{}
s.router = NewRouter(nil, s.manager, nil) s.router = NewRouter(nil, s.manager)
} }
func (s *EnvironmentControllerTestSuite) TestList() { func (s *EnvironmentControllerTestSuite) TestList() {

View File

@ -68,7 +68,7 @@ func (r *RunnerController) provide(writer http.ResponseWriter, request *http.Req
} }
return return
} }
monitoring.AddRunnerMonitoringData(request, nextRunner) monitoring.AddRunnerMonitoringData(request, nextRunner.ID(), nextRunner.Environment())
sendJSON(writer, &dto.RunnerResponse{ID: nextRunner.ID(), MappedPorts: nextRunner.MappedPorts()}, http.StatusOK) sendJSON(writer, &dto.RunnerResponse{ID: nextRunner.ID(), MappedPorts: nextRunner.MappedPorts()}, http.StatusOK)
} }
@ -82,7 +82,7 @@ func (r *RunnerController) updateFileSystem(writer http.ResponseWriter, request
} }
targetRunner, _ := runner.FromContext(request.Context()) targetRunner, _ := runner.FromContext(request.Context())
monitoring.AddRunnerMonitoringData(request, targetRunner) monitoring.AddRunnerMonitoringData(request, targetRunner.ID(), targetRunner.Environment())
if err := targetRunner.UpdateFileSystem(fileCopyRequest); err != nil { if err := targetRunner.UpdateFileSystem(fileCopyRequest); err != nil {
log.WithError(err).Error("Could not perform the requested updateFileSystem.") log.WithError(err).Error("Could not perform the requested updateFileSystem.")
writeInternalServerError(writer, err, dto.ErrorUnknown) writeInternalServerError(writer, err, dto.ErrorUnknown)
@ -108,7 +108,7 @@ func (r *RunnerController) execute(writer http.ResponseWriter, request *http.Req
scheme = "ws" scheme = "ws"
} }
targetRunner, _ := runner.FromContext(request.Context()) targetRunner, _ := runner.FromContext(request.Context())
monitoring.AddRunnerMonitoringData(request, targetRunner) monitoring.AddRunnerMonitoringData(request, targetRunner.ID(), targetRunner.Environment())
path, err := r.runnerRouter.Get(WebsocketPath).URL(RunnerIDKey, targetRunner.ID()) path, err := r.runnerRouter.Get(WebsocketPath).URL(RunnerIDKey, targetRunner.ID())
if err != nil { if err != nil {
@ -160,7 +160,7 @@ func (r *RunnerController) findRunnerMiddleware(next http.Handler) http.Handler
// It destroys the given runner on the executor and removes it from the used runners list. // It destroys the given runner on the executor and removes it from the used runners list.
func (r *RunnerController) delete(writer http.ResponseWriter, request *http.Request) { func (r *RunnerController) delete(writer http.ResponseWriter, request *http.Request) {
targetRunner, _ := runner.FromContext(request.Context()) targetRunner, _ := runner.FromContext(request.Context())
monitoring.AddRunnerMonitoringData(request, targetRunner) monitoring.AddRunnerMonitoringData(request, targetRunner.ID(), targetRunner.Environment())
err := r.manager.Return(targetRunner) err := r.manager.Return(targetRunner)
if err != nil { if err != nil {

View File

@ -107,7 +107,7 @@ type RunnerRouteTestSuite struct {
func (s *RunnerRouteTestSuite) SetupTest() { func (s *RunnerRouteTestSuite) SetupTest() {
s.runnerManager = &runner.ManagerMock{} s.runnerManager = &runner.ManagerMock{}
s.router = NewRouter(s.runnerManager, nil, nil) s.router = NewRouter(s.runnerManager, nil)
s.runner = runner.NewNomadJob("some-id", nil, nil, nil) s.runner = runner.NewNomadJob("some-id", nil, nil, nil)
s.executionID = "execution" s.executionID = "execution"
s.runner.StoreExecution(s.executionID, &dto.ExecutionRequest{}) s.runner.StoreExecution(s.executionID, &dto.ExecutionRequest{})

View File

@ -76,7 +76,7 @@ func (wp *webSocketProxy) waitForExit(exit <-chan runner.ExitInfo, cancelExecuti
// connectToRunner is the endpoint for websocket connections. // connectToRunner is the endpoint for websocket connections.
func (r *RunnerController) connectToRunner(writer http.ResponseWriter, request *http.Request) { func (r *RunnerController) connectToRunner(writer http.ResponseWriter, request *http.Request) {
targetRunner, _ := runner.FromContext(request.Context()) targetRunner, _ := runner.FromContext(request.Context())
monitoring.AddRunnerMonitoringData(request, targetRunner) monitoring.AddRunnerMonitoringData(request, targetRunner.ID(), targetRunner.Environment())
executionID := request.URL.Query().Get(ExecutionIDKey) executionID := request.URL.Query().Get(ExecutionIDKey)
if !targetRunner.ExecutionExists(executionID) { if !targetRunner.ExecutionExists(executionID) {

View File

@ -51,7 +51,7 @@ func (s *WebSocketTestSuite) SetupTest() {
runnerManager := &runner.ManagerMock{} runnerManager := &runner.ManagerMock{}
runnerManager.On("Get", s.runner.ID()).Return(s.runner, nil) runnerManager.On("Get", s.runner.ID()).Return(s.runner, nil)
s.router = NewRouter(runnerManager, nil, nil) s.router = NewRouter(runnerManager, nil)
s.server = httptest.NewServer(s.router) s.server = httptest.NewServer(s.router)
} }
@ -257,7 +257,7 @@ func TestWebsocketTLS(t *testing.T) {
runnerManager := &runner.ManagerMock{} runnerManager := &runner.ManagerMock{}
runnerManager.On("Get", r.ID()).Return(r, nil) runnerManager.On("Get", r.ID()).Return(r, nil)
router := NewRouter(runnerManager, nil, nil) router := NewRouter(runnerManager, nil)
server, err := helpers.StartTLSServer(t, router) server, err := helpers.StartTLSServer(t, router)
require.NoError(t, err) require.NoError(t, err)
@ -327,7 +327,7 @@ func newRunnerWithNotMockedRunnerManager(t *testing.T, apiMock *nomad.ExecutorAP
call.ReturnArguments = mock.Arguments{nil} call.ReturnArguments = mock.Arguments{nil}
}) })
runnerManager := runner.NewNomadRunnerManager(apiMock, context.Background()) runnerManager := runner.NewNomadRunnerManager(apiMock, context.Background())
router := NewRouter(runnerManager, nil, nil) router := NewRouter(runnerManager, nil)
server := httptest.NewServer(router) server := httptest.NewServer(router)
runnerID := tests.DefaultRunnerID runnerID := tests.DefaultRunnerID

View File

@ -87,7 +87,7 @@ func (a *AWSEnvironment) CPULimit() uint {
func (a *AWSEnvironment) SetCPULimit(_ uint) {} func (a *AWSEnvironment) SetCPULimit(_ uint) {}
func (a *AWSEnvironment) MemoryLimit() uint { func (a *AWSEnvironment) MemoryLimit() uint {
panic("not supported") return 0
} }
func (a *AWSEnvironment) SetMemoryLimit(_ uint) { func (a *AWSEnvironment) SetMemoryLimit(_ uint) {
@ -95,7 +95,7 @@ func (a *AWSEnvironment) SetMemoryLimit(_ uint) {
} }
func (a *AWSEnvironment) NetworkAccess() (enabled bool, mappedPorts []uint16) { func (a *AWSEnvironment) NetworkAccess() (enabled bool, mappedPorts []uint16) {
panic("not supported") return false, nil
} }
func (a *AWSEnvironment) SetNetworkAccess(_ bool, _ []uint16) { func (a *AWSEnvironment) SetNetworkAccess(_ bool, _ []uint16) {

View File

@ -11,6 +11,7 @@ import (
"github.com/openHPI/poseidon/internal/nomad" "github.com/openHPI/poseidon/internal/nomad"
"github.com/openHPI/poseidon/internal/runner" "github.com/openHPI/poseidon/internal/runner"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/monitoring"
"github.com/openHPI/poseidon/pkg/storage" "github.com/openHPI/poseidon/pkg/storage"
"strconv" "strconv"
"sync" "sync"
@ -35,7 +36,8 @@ func NewNomadEnvironment(apiClient nomad.ExecutorAPI, jobHCL string) (*NomadEnvi
return nil, fmt.Errorf("error parsing Nomad job: %w", err) return nil, fmt.Errorf("error parsing Nomad job: %w", err)
} }
return &NomadEnvironment{apiClient, jobHCL, job, storage.NewLocalStorage[runner.Runner]()}, nil return &NomadEnvironment{apiClient, jobHCL, job,
storage.NewMonitoredLocalStorage[runner.Runner](monitoring.MeasurementIdleRunnerNomad, nil)}, nil
} }
func NewNomadEnvironmentFromRequest( func NewNomadEnvironmentFromRequest(
@ -80,6 +82,7 @@ func (n *NomadEnvironment) PrewarmingPoolSize() uint {
} }
func (n *NomadEnvironment) SetPrewarmingPoolSize(count uint) { func (n *NomadEnvironment) SetPrewarmingPoolSize(count uint) {
monitoring.ChangedPrewarmingPoolSize(n.ID(), count)
taskGroup := nomad.FindAndValidateConfigTaskGroup(n.job) taskGroup := nomad.FindAndValidateConfigTaskGroup(n.job)
if taskGroup.Meta == nil { if taskGroup.Meta == nil {

View File

@ -8,6 +8,7 @@ import (
"github.com/openHPI/poseidon/internal/runner" "github.com/openHPI/poseidon/internal/runner"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/logging" "github.com/openHPI/poseidon/pkg/logging"
"github.com/openHPI/poseidon/pkg/monitoring"
"github.com/openHPI/poseidon/pkg/storage" "github.com/openHPI/poseidon/pkg/storage"
"os" "os"
) )
@ -142,7 +143,7 @@ func (m *NomadEnvironmentManager) Load() error {
apiClient: m.api, apiClient: m.api,
jobHCL: templateEnvironmentJobHCL, jobHCL: templateEnvironmentJobHCL,
job: job, job: job,
idleRunners: storage.NewLocalStorage[runner.Runner](), idleRunners: storage.NewMonitoredLocalStorage[runner.Runner](monitoring.MeasurementIdleRunnerNomad, nil),
} }
m.runnerManager.StoreEnvironment(environment) m.runnerManager.StoreEnvironment(environment)
jobLogger.Info("Successfully recovered environment") jobLogger.Info("Successfully recovered environment")
@ -181,7 +182,7 @@ func fetchEnvironment(id dto.EnvironmentID, apiClient nomad.ExecutorAPI) (runner
apiClient: apiClient, apiClient: apiClient,
jobHCL: templateEnvironmentJobHCL, jobHCL: templateEnvironmentJobHCL,
job: job, job: job,
idleRunners: storage.NewLocalStorage[runner.Runner](), idleRunners: storage.NewMonitoredLocalStorage[runner.Runner](monitoring.MeasurementIdleRunnerNomad, nil),
} }
} }
} }

View File

@ -4,6 +4,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/monitoring"
"github.com/openHPI/poseidon/pkg/storage" "github.com/openHPI/poseidon/pkg/storage"
) )
@ -21,8 +22,9 @@ type AbstractManager struct {
// NewAbstractManager creates a new abstract runner manager that keeps track of all runners of one kind. // NewAbstractManager creates a new abstract runner manager that keeps track of all runners of one kind.
func NewAbstractManager() *AbstractManager { func NewAbstractManager() *AbstractManager {
return &AbstractManager{ return &AbstractManager{
environments: storage.NewLocalStorage[ExecutionEnvironment](), environments: storage.NewMonitoredLocalStorage[ExecutionEnvironment](
usedRunners: storage.NewLocalStorage[Runner](), monitoring.MeasurementEnvironments, monitorEnvironmentData),
usedRunners: storage.NewMonitoredLocalStorage[Runner](monitoring.MeasurementUsedRunner, nil),
} }
} }

View File

@ -14,8 +14,7 @@ func TestAWSRunnerManager_EnvironmentAccessor(t *testing.T) {
environments := m.ListEnvironments() environments := m.ListEnvironments()
assert.Empty(t, environments) assert.Empty(t, environments)
environment := &ExecutionEnvironmentMock{} environment := createBasicEnvironmentMock(defaultEnvironmentID)
environment.On("ID").Return(dto.EnvironmentID(tests.DefaultEnvironmentIDAsInteger))
m.StoreEnvironment(environment) m.StoreEnvironment(environment)
environments = m.ListEnvironments() environments = m.ListEnvironments()
@ -32,8 +31,7 @@ func TestAWSRunnerManager_EnvironmentAccessor(t *testing.T) {
func TestAWSRunnerManager_Claim(t *testing.T) { func TestAWSRunnerManager_Claim(t *testing.T) {
m := NewAWSRunnerManager() m := NewAWSRunnerManager()
environment := &ExecutionEnvironmentMock{} environment := createBasicEnvironmentMock(defaultEnvironmentID)
environment.On("ID").Return(dto.EnvironmentID(tests.DefaultEnvironmentIDAsInteger))
r, err := NewAWSFunctionWorkload(environment, nil) r, err := NewAWSFunctionWorkload(environment, nil)
assert.NoError(t, err) assert.NoError(t, err)
environment.On("Sample").Return(r, true) environment.On("Sample").Return(r, true)
@ -59,8 +57,7 @@ func TestAWSRunnerManager_Claim(t *testing.T) {
func TestAWSRunnerManager_Return(t *testing.T) { func TestAWSRunnerManager_Return(t *testing.T) {
m := NewAWSRunnerManager() m := NewAWSRunnerManager()
environment := &ExecutionEnvironmentMock{} environment := createBasicEnvironmentMock(defaultEnvironmentID)
environment.On("ID").Return(dto.EnvironmentID(tests.DefaultEnvironmentIDAsInteger))
m.StoreEnvironment(environment) m.StoreEnvironment(environment)
r, err := NewAWSFunctionWorkload(environment, nil) r, err := NewAWSFunctionWorkload(environment, nil)
assert.NoError(t, err) assert.NoError(t, err)
@ -85,3 +82,13 @@ func TestAWSRunnerManager_Return(t *testing.T) {
nextHandler.AssertCalled(t, "Return", nonAWSRunner) nextHandler.AssertCalled(t, "Return", nonAWSRunner)
}) })
} }
func createBasicEnvironmentMock(id dto.EnvironmentID) *ExecutionEnvironmentMock {
environment := &ExecutionEnvironmentMock{}
environment.On("ID").Return(id)
environment.On("Image").Return("")
environment.On("CPULimit").Return(uint(0))
environment.On("MemoryLimit").Return(uint(0))
environment.On("NetworkAccess").Return(false, nil)
return environment
}

View File

@ -10,6 +10,7 @@ import (
"github.com/openHPI/poseidon/internal/config" "github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/execution" "github.com/openHPI/poseidon/pkg/execution"
"github.com/openHPI/poseidon/pkg/monitoring"
"github.com/openHPI/poseidon/pkg/storage" "github.com/openHPI/poseidon/pkg/storage"
"io" "io"
) )
@ -47,7 +48,7 @@ func NewAWSFunctionWorkload(
workload := &AWSFunctionWorkload{ workload := &AWSFunctionWorkload{
id: newUUID.String(), id: newUUID.String(),
fs: make(map[dto.FilePath][]byte), fs: make(map[dto.FilePath][]byte),
executions: storage.NewLocalStorage[*dto.ExecutionRequest](), executions: storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](monitoring.MeasurementExecutionsAWS, nil),
runningExecutions: make(map[execution.ID]context.CancelFunc), runningExecutions: make(map[execution.ID]context.CancelFunc),
onDestroy: onDestroy, onDestroy: onDestroy,
environment: environment, environment: environment,

View File

@ -2,7 +2,9 @@ package runner
import ( import (
"encoding/json" "encoding/json"
"github.com/influxdata/influxdb-client-go/v2/api/write"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"strconv"
) )
// ExecutionEnvironment are groups of runner that share the configuration stored in the environment. // ExecutionEnvironment are groups of runner that share the configuration stored in the environment.
@ -47,3 +49,14 @@ type ExecutionEnvironment interface {
// IdleRunnerCount returns the number of idle runners of the environment. // IdleRunnerCount returns the number of idle runners of the environment.
IdleRunnerCount() uint IdleRunnerCount() uint
} }
// monitorEnvironmentData passes the configuration of the environment e into the monitoring Point p.
func monitorEnvironmentData(p *write.Point, e ExecutionEnvironment, isDeletion bool) {
if !isDeletion && e != nil {
p.AddTag("image", e.Image())
p.AddTag("cpu_limit", strconv.Itoa(int(e.CPULimit())))
p.AddTag("memory_limit", strconv.Itoa(int(e.MemoryLimit())))
hasNetworkAccess, _ := e.NetworkAccess()
p.AddTag("network_access", strconv.FormatBool(hasNetworkAccess))
}
}

View File

@ -36,8 +36,8 @@ func (s *ManagerTestSuite) SetupTest() {
s.nomadRunnerManager = NewNomadRunnerManager(s.apiMock, ctx) s.nomadRunnerManager = NewNomadRunnerManager(s.apiMock, ctx)
s.exerciseRunner = NewRunner(tests.DefaultRunnerID, s.nomadRunnerManager) s.exerciseRunner = NewRunner(tests.DefaultRunnerID, s.nomadRunnerManager)
s.exerciseEnvironment = &ExecutionEnvironmentMock{} s.exerciseEnvironment = createBasicEnvironmentMock(defaultEnvironmentID)
s.setDefaultEnvironment() s.nomadRunnerManager.StoreEnvironment(s.exerciseEnvironment)
} }
func mockRunnerQueries(apiMock *nomad.ExecutorAPIMock, returnedRunnerIds []string) { func mockRunnerQueries(apiMock *nomad.ExecutorAPIMock, returnedRunnerIds []string) {
@ -81,18 +81,12 @@ func mockIdleRunners(environmentMock *ExecutionEnvironmentMock) {
}) })
} }
func (s *ManagerTestSuite) setDefaultEnvironment() {
s.exerciseEnvironment.On("ID").Return(defaultEnvironmentID)
s.nomadRunnerManager.StoreEnvironment(s.exerciseEnvironment)
}
func (s *ManagerTestSuite) waitForRunnerRefresh() { func (s *ManagerTestSuite) waitForRunnerRefresh() {
<-time.After(100 * time.Millisecond) <-time.After(100 * time.Millisecond)
} }
func (s *ManagerTestSuite) TestSetEnvironmentAddsNewEnvironment() { func (s *ManagerTestSuite) TestSetEnvironmentAddsNewEnvironment() {
anotherEnvironment := &ExecutionEnvironmentMock{} anotherEnvironment := createBasicEnvironmentMock(anotherEnvironmentID)
anotherEnvironment.On("ID").Return(anotherEnvironmentID)
s.nomadRunnerManager.StoreEnvironment(anotherEnvironment) s.nomadRunnerManager.StoreEnvironment(anotherEnvironment)
job, ok := s.nomadRunnerManager.environments.Get(anotherEnvironmentID.ToString()) job, ok := s.nomadRunnerManager.environments.Get(anotherEnvironmentID.ToString())

View File

@ -11,18 +11,16 @@ import (
nomadApi "github.com/hashicorp/nomad/api" nomadApi "github.com/hashicorp/nomad/api"
"github.com/openHPI/poseidon/internal/nomad" "github.com/openHPI/poseidon/internal/nomad"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/monitoring"
"github.com/openHPI/poseidon/pkg/storage" "github.com/openHPI/poseidon/pkg/storage"
"io" "io"
"strings" "strings"
"time" "time"
) )
// ContextKey is the type for keys in a request context.
type ContextKey string
const ( const (
// runnerContextKey is the key used to store runners in context.Context. // runnerContextKey is the key used to store runners in context.Context.
runnerContextKey ContextKey = "runner" runnerContextKey dto.ContextKey = "runner"
// SIGQUIT is the character that causes a tty to send the SIGQUIT signal to the controlled process. // SIGQUIT is the character that causes a tty to send the SIGQUIT signal to the controlled process.
SIGQUIT = 0x1c SIGQUIT = 0x1c
// executionTimeoutGracePeriod is the time to wait after sending a SIGQUIT signal to a timed out execution. // executionTimeoutGracePeriod is the time to wait after sending a SIGQUIT signal to a timed out execution.
@ -55,7 +53,7 @@ func NewNomadJob(id string, portMappings []nomadApi.PortMapping,
id: id, id: id,
portMappings: portMappings, portMappings: portMappings,
api: apiClient, api: apiClient,
executions: storage.NewLocalStorage[*dto.ExecutionRequest](), executions: storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](monitoring.MeasurementExecutionsNomad, nil),
onDestroy: onDestroy, onDestroy: onDestroy,
} }
job.InactivityTimer = NewInactivityTimer(job, onDestroy) job.InactivityTimer = NewInactivityTimer(job, onDestroy)

View File

@ -136,6 +136,9 @@ func (f File) ByteContent() []byte {
} }
} }
// ContextKey is the type for keys in a request context that is used for passing data to the next handler.
type ContextKey string
// WebSocketMessageType is the type for the messages from Poseidon to the client. // WebSocketMessageType is the type for the messages from Poseidon to the client.
type WebSocketMessageType string type WebSocketMessageType string

View File

@ -8,69 +8,77 @@ import (
influxdb2API "github.com/influxdata/influxdb-client-go/v2/api" influxdb2API "github.com/influxdata/influxdb-client-go/v2/api"
"github.com/influxdata/influxdb-client-go/v2/api/write" "github.com/influxdata/influxdb-client-go/v2/api/write"
"github.com/openHPI/poseidon/internal/config" "github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/environment"
"github.com/openHPI/poseidon/internal/runner"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/logging" "github.com/openHPI/poseidon/pkg/logging"
"io" "io"
"net/http" "net/http"
"reflect"
"strconv" "strconv"
"time" "time"
) )
var log = logging.GetLogger("monitoring")
const ( const (
// influxdbContextKey is a key to reference the influxdb data point in the request context. // influxdbContextKey is a key (runner.ContextKey) to reference the influxdb data point in the request context.
influxdbContextKey runner.ContextKey = "influxdb data point" influxdbContextKey dto.ContextKey = "influxdb data point"
// influxdbMeasurementPrefix allows easier filtering in influxdb. // measurementPrefix allows easier filtering in influxdb.
influxdbMeasurementPrefix = "poseidon_" measurementPrefix = "poseidon_"
measurementPoolSize = measurementPrefix + "poolsize"
MeasurementIdleRunnerNomad = measurementPrefix + "nomad_idle_runners"
MeasurementExecutionsAWS = measurementPrefix + "aws_executions"
MeasurementExecutionsNomad = measurementPrefix + "nomad_executions"
MeasurementEnvironments = measurementPrefix + "environments"
MeasurementUsedRunner = measurementPrefix + "used_runners"
// The keys for the monitored tags and fields. // The keys for the monitored tags and fields.
influxKeyRunnerID = "runner_id" influxKeyRunnerID = "runner_id"
influxKeyEnvironmentID = "environment_id" influxKeyEnvironmentID = "environment_id"
influxKeyEnvironmentType = "environment_type"
influxKeyEnvironmentIdleRunner = "idle_runner"
influxKeyEnvironmentPrewarmingPoolSize = "prewarming_pool_size" influxKeyEnvironmentPrewarmingPoolSize = "prewarming_pool_size"
influxKeyRequestSize = "request_size" influxKeyRequestSize = "request_size"
) )
// InfluxDB2Middleware is a middleware to send events to an influx database. var (
func InfluxDB2Middleware(influxClient influxdb2API.WriteAPI, manager environment.Manager) mux.MiddlewareFunc { log = logging.GetLogger("monitoring")
return func(next http.Handler) http.Handler { influxClient influxdb2API.WriteAPI
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { )
route := mux.CurrentRoute(r).GetName()
p := influxdb2.NewPointWithMeasurement(influxdbMeasurementPrefix + route)
p.AddTag("stage", config.Config.InfluxDB.Stage)
start := time.Now().UTC() func InitializeInfluxDB(db *config.InfluxDB) (cancel func()) {
p.SetTime(time.Now()) if db.URL == "" {
return func() {}
ctx := context.WithValue(r.Context(), influxdbContextKey, p)
requestWithPoint := r.WithContext(ctx)
lrw := logging.NewLoggingResponseWriter(w)
next.ServeHTTP(lrw, requestWithPoint)
p.AddField("duration", time.Now().UTC().Sub(start).Nanoseconds())
p.AddTag("status", strconv.Itoa(lrw.StatusCode))
environmentID, err := strconv.Atoi(getEnvironmentID(p))
if err == nil && manager != nil {
addEnvironmentData(p, manager, dto.EnvironmentID(environmentID))
}
if influxClient != nil {
influxClient.WritePoint(p)
}
})
} }
client := influxdb2.NewClient(db.URL, db.Token)
influxClient = client.WriteAPI(db.Organization, db.Bucket)
cancel = func() {
influxClient.Flush()
client.Close()
}
return cancel
}
// InfluxDB2Middleware is a middleware to send events to an influx database.
func InfluxDB2Middleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
route := mux.CurrentRoute(r).GetName()
p := influxdb2.NewPointWithMeasurement(measurementPrefix + route)
start := time.Now().UTC()
p.SetTime(time.Now())
ctx := context.WithValue(r.Context(), influxdbContextKey, p)
requestWithPoint := r.WithContext(ctx)
lrw := logging.NewLoggingResponseWriter(w)
next.ServeHTTP(lrw, requestWithPoint)
p.AddField("duration", time.Now().UTC().Sub(start).Nanoseconds())
p.AddTag("status", strconv.Itoa(lrw.StatusCode))
WriteInfluxPoint(p)
})
} }
// AddRunnerMonitoringData adds the data of the runner we want to monitor. // AddRunnerMonitoringData adds the data of the runner we want to monitor.
func AddRunnerMonitoringData(request *http.Request, r runner.Runner) { func AddRunnerMonitoringData(request *http.Request, runnerID string, environmentID dto.EnvironmentID) {
addRunnerID(request, r.ID()) addRunnerID(request, runnerID)
addEnvironmentID(request, r.Environment()) addEnvironmentID(request, environmentID)
} }
// addRunnerID adds the runner id to the influx data point for the current request. // addRunnerID adds the runner id to the influx data point for the current request.
@ -99,6 +107,23 @@ func AddRequestSize(r *http.Request) {
addInfluxDBField(r, influxKeyRequestSize, len(body)) addInfluxDBField(r, influxKeyRequestSize, len(body))
} }
func ChangedPrewarmingPoolSize(id dto.EnvironmentID, count uint) {
p := influxdb2.NewPointWithMeasurement(measurementPoolSize)
p.AddTag(influxKeyEnvironmentID, strconv.Itoa(int(id)))
p.AddField(influxKeyEnvironmentPrewarmingPoolSize, count)
WriteInfluxPoint(p)
}
// WriteInfluxPoint schedules the indlux data point to be sent.
func WriteInfluxPoint(p *write.Point) {
if influxClient != nil {
p.AddTag("stage", config.Config.InfluxDB.Stage)
influxClient.WritePoint(p)
}
}
// addInfluxDBTag adds a tag to the influxdb data point in the request. // addInfluxDBTag adds a tag to the influxdb data point in the request.
func addInfluxDBTag(r *http.Request, key, value string) { func addInfluxDBTag(r *http.Request, key, value string) {
dataPointFromRequest(r).AddTag(key, value) dataPointFromRequest(r).AddTag(key, value)
@ -117,32 +142,3 @@ func dataPointFromRequest(r *http.Request) *write.Point {
} }
return p return p
} }
// getEnvironmentID tries to find the environment id in the influxdb data point.
func getEnvironmentID(p *write.Point) string {
for _, tag := range p.TagList() {
if tag.Key == influxKeyEnvironmentID {
return tag.Value
}
}
return ""
}
// addEnvironmentData adds environment specific data to the influxdb data point.
func addEnvironmentData(p *write.Point, manager environment.Manager, id dto.EnvironmentID) {
e, err := manager.Get(id, false)
if err == nil {
p.AddTag(influxKeyEnvironmentType, getType(e))
p.AddField(influxKeyEnvironmentIdleRunner, e.IdleRunnerCount())
p.AddField(influxKeyEnvironmentPrewarmingPoolSize, e.PrewarmingPoolSize())
}
}
// Get type returns the type of the passed execution environment.
func getType(e runner.ExecutionEnvironment) string {
if t := reflect.TypeOf(e); t.Kind() == reflect.Ptr {
return t.Elem().Name()
} else {
return t.Name()
}
}

View File

@ -1,6 +1,10 @@
package storage package storage
import ( import (
influxdb2 "github.com/influxdata/influxdb-client-go/v2"
"github.com/influxdata/influxdb-client-go/v2/api/write"
"github.com/openHPI/poseidon/pkg/monitoring"
"strconv"
"sync" "sync"
) )
@ -36,10 +40,14 @@ type Storage[T any] interface {
Sample() (o T, ok bool) Sample() (o T, ok bool)
} }
type WriteCallback[T any] func(p *write.Point, object T, isDeletion bool)
// localStorage stores objects in the local application memory. // localStorage stores objects in the local application memory.
type localStorage[T any] struct { type localStorage[T any] struct {
sync.RWMutex sync.RWMutex
objects map[string]T objects map[string]T
measurement string
callback WriteCallback[T]
} }
// NewLocalStorage responds with a Storage implementation. // NewLocalStorage responds with a Storage implementation.
@ -50,6 +58,17 @@ func NewLocalStorage[T any]() *localStorage[T] {
} }
} }
// NewMonitoredLocalStorage responds with a Storage implementation.
// All write operations are monitored in the passed measurement.
// Iff callback is set, it will be called on a write operation.
func NewMonitoredLocalStorage[T any](measurement string, callback WriteCallback[T]) *localStorage[T] {
return &localStorage[T]{
objects: make(map[string]T),
measurement: measurement,
callback: callback,
}
}
func (s *localStorage[T]) List() (o []T) { func (s *localStorage[T]) List() (o []T) {
s.RLock() s.RLock()
defer s.RUnlock() defer s.RUnlock()
@ -63,6 +82,7 @@ func (s *localStorage[T]) Add(id string, o T) {
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
s.objects[id] = o s.objects[id] = o
s.sendMonitoringData(id, o, false)
} }
func (s *localStorage[T]) Get(id string) (o T, ok bool) { func (s *localStorage[T]) Get(id string) (o T, ok bool) {
@ -75,6 +95,7 @@ func (s *localStorage[T]) Get(id string) (o T, ok bool) {
func (s *localStorage[T]) Delete(id string) { func (s *localStorage[T]) Delete(id string) {
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
s.sendMonitoringData(id, s.objects[id], true)
delete(s.objects, id) delete(s.objects, id)
} }
@ -87,6 +108,9 @@ func (s *localStorage[T]) Pop(id string) (T, bool) {
func (s *localStorage[T]) Purge() { func (s *localStorage[T]) Purge() {
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
for key, object := range s.objects {
s.sendMonitoringData(key, object, true)
}
s.objects = make(map[string]T) s.objects = make(map[string]T)
} }
@ -94,6 +118,7 @@ func (s *localStorage[T]) Sample() (o T, ok bool) {
s.Lock() s.Lock()
defer s.Unlock() defer s.Unlock()
for key, object := range s.objects { for key, object := range s.objects {
s.sendMonitoringData(key, object, true)
delete(s.objects, key) delete(s.objects, key)
return object, true return object, true
} }
@ -105,3 +130,18 @@ func (s *localStorage[T]) Length() uint {
defer s.RUnlock() defer s.RUnlock()
return uint(len(s.objects)) return uint(len(s.objects))
} }
func (s *localStorage[T]) sendMonitoringData(id string, o T, isDeletion bool) {
if s.measurement != "" {
p := influxdb2.NewPointWithMeasurement(s.measurement)
p.AddTag("id", id)
p.AddTag("isDeletion", strconv.FormatBool(isDeletion))
p.AddField("count", 1)
if s.callback != nil {
s.callback(p, o, isDeletion)
}
monitoring.WriteInfluxPoint(p)
}
}