Add Prewarming Pool Alert

that checks for every environment if the filled share of the prewarmin pool is at least the specified threshold.
This commit is contained in:
Maximilian Paß
2023-10-31 13:06:53 +01:00
committed by Sebastian Serth
parent 1be3ce5ae1
commit c46a09eeae
9 changed files with 101 additions and 22 deletions

View File

@ -142,6 +142,7 @@ components:
- NOMAD_UNREACHABLE - NOMAD_UNREACHABLE
- NOMAD_OVERLOAD - NOMAD_OVERLOAD
- NOMAD_INTERNAL_SERVER_ERROR - NOMAD_INTERNAL_SERVER_ERROR
- PREWARMING_POOL_DEPLETING
- UNKNOWN - UNKNOWN
example: NOMAD_UNREACHABLE example: NOMAD_UNREACHABLE
@ -162,7 +163,8 @@ paths:
responses: responses:
"204": "204":
description: Everything okay description: Everything okay
"503":
$ref: "#/components/responses/InternalServerError"
/version: /version:
get: get:
summary: Retrieve the version of Poseidon summary: Retrieve the version of Poseidon

View File

@ -19,6 +19,10 @@ server:
interactivestderr: true interactivestderr: true
# If set, the file at the given path overwrites the default Nomad job file in internal/environment/template-environment-job.hcl # If set, the file at the given path overwrites the default Nomad job file in internal/environment/template-environment-job.hcl
# templatejobfile: ./poseidon.hcl # templatejobfile: ./poseidon.hcl
# The prewarming pool alert threshold [0, 1) defines which part of the prewarming pool should always be filled.
# Setting it to 0 will disable the alert.
# If the prewarming pool is filled for less than, i.e., 50%, the health route of Poseidon will return a warning.
prewarmingpoolalertthreshold: 0.5
# Configuration of the used Nomad cluster # Configuration of the used Nomad cluster
nomad: nomad:

View File

@ -46,7 +46,7 @@ func configureV1Router(router *mux.Router,
w.WriteHeader(http.StatusNotFound) w.WriteHeader(http.StatusNotFound)
}) })
v1 := router.PathPrefix(BasePath).Subrouter() v1 := router.PathPrefix(BasePath).Subrouter()
v1.HandleFunc(HealthPath, Health).Methods(http.MethodGet).Name(HealthPath) v1.HandleFunc(HealthPath, Health(environmentManager)).Methods(http.MethodGet).Name(HealthPath)
v1.HandleFunc(VersionPath, Version).Methods(http.MethodGet).Name(VersionPath) v1.HandleFunc(VersionPath, Version).Methods(http.MethodGet).Name(VersionPath)
runnerController := &RunnerController{manager: runnerManager} runnerController := &RunnerController{manager: runnerManager}

View File

@ -1,12 +1,42 @@
package api package api
import ( import (
"errors"
"fmt"
"github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/environment"
"github.com/openHPI/poseidon/pkg/dto"
"net/http" "net/http"
"strings"
) )
var ErrorPrewarmingPoolDepleting = errors.New("the prewarming pool is depleting")
// Health handles the health route. // Health handles the health route.
// It responds that the server is alive. // It responds that the server is alive.
// If it is not, the response won't reach the client. // If it is not, the response won't reach the client.
func Health(writer http.ResponseWriter, _ *http.Request) { func Health(manager environment.Manager) http.HandlerFunc {
writer.WriteHeader(http.StatusNoContent) return func(writer http.ResponseWriter, request *http.Request) {
if err := checkPrewarmingPool(manager); err != nil {
sendJSON(writer, &dto.InternalServerError{Message: err.Error(), ErrorCode: dto.PrewarmingPoolDepleting},
http.StatusServiceUnavailable, request.Context())
return
}
writer.WriteHeader(http.StatusNoContent)
}
}
func checkPrewarmingPool(manager environment.Manager) error {
var depletingEnvironments []int
for _, data := range manager.Statistics() {
if float64(data.IdleRunners)/float64(data.PrewarmingPoolSize) < config.Config.Server.PrewarmingPoolAlertThreshold {
depletingEnvironments = append(depletingEnvironments, data.ID)
}
}
if len(depletingEnvironments) > 0 {
arrayToString := strings.Trim(strings.Join(strings.Fields(fmt.Sprint(depletingEnvironments)), ","), "[]")
return fmt.Errorf("%w: environments %s", ErrorPrewarmingPoolDepleting, arrayToString)
}
return nil
} }

View File

@ -1,16 +1,55 @@
package api package api
import ( import (
"encoding/json"
"github.com/openHPI/poseidon/internal/config"
"github.com/openHPI/poseidon/internal/environment"
"github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/tests"
"io"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
) )
func (s *MainTestSuite) TestHealthRoute() { func (s *MainTestSuite) TestHealth() {
request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody) s.Run("returns StatusNoContent as default", func() {
if err != nil { request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody)
s.T().Fatal(err) if err != nil {
} s.T().Fatal(err)
recorder := httptest.NewRecorder() }
http.HandlerFunc(Health).ServeHTTP(recorder, request) recorder := httptest.NewRecorder()
s.Equal(http.StatusNoContent, recorder.Code) manager := &environment.ManagerHandlerMock{}
manager.On("Statistics").Return(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{})
Health(manager).ServeHTTP(recorder, request)
s.Equal(http.StatusNoContent, recorder.Code)
})
s.Run("returns InternalServerError for warnings and errors", func() {
s.Run("Prewarming Pool Alert", func() {
request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody)
if err != nil {
s.T().Fatal(err)
}
recorder := httptest.NewRecorder()
manager := &environment.ManagerHandlerMock{}
manager.On("Statistics").Return(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{
tests.DefaultEnvironmentIDAsInteger: {
ID: tests.DefaultEnvironmentIDAsInteger,
PrewarmingPoolSize: 3,
IdleRunners: 1,
},
})
config.Config.Server.PrewarmingPoolAlertThreshold = 0.5
Health(manager).ServeHTTP(recorder, request)
s.Equal(http.StatusServiceUnavailable, recorder.Code)
b, err := io.ReadAll(recorder.Body)
s.Require().NoError(err)
var details dto.InternalServerError
err = json.Unmarshal(b, &details)
s.Require().NoError(err)
s.Contains(details.Message, ErrorPrewarmingPoolDepleting.Error())
})
})
} }

View File

@ -29,8 +29,9 @@ var (
CertFile: "", CertFile: "",
KeyFile: "", KeyFile: "",
}, },
InteractiveStderr: true, InteractiveStderr: true,
TemplateJobFile: "", TemplateJobFile: "",
PrewarmingPoolAlertThreshold: 0,
}, },
Nomad: Nomad{ Nomad: Nomad{
Enabled: true, Enabled: true,
@ -81,12 +82,13 @@ var (
// server configures the Poseidon webserver. // server configures the Poseidon webserver.
type server struct { type server struct {
Address string Address string
Port int Port int
Token string Token string
TLS TLS TLS TLS
InteractiveStderr bool InteractiveStderr bool
TemplateJobFile string TemplateJobFile string
PrewarmingPoolAlertThreshold float64
} }
// URL returns the URL of the Poseidon webserver. // URL returns the URL of the Poseidon webserver.

View File

@ -60,8 +60,9 @@ func (a *AWSEnvironment) Sample() (r runner.Runner, ok bool) {
// The following methods are not supported at this moment. // The following methods are not supported at this moment.
// IdleRunnerCount is not supported as we have no information about the AWS managed prewarming pool. // IdleRunnerCount is not supported as we have no information about the AWS managed prewarming pool.
// For the Poseidon Health check we default to 1.
func (a *AWSEnvironment) IdleRunnerCount() uint { func (a *AWSEnvironment) IdleRunnerCount() uint {
return 0 return 1
} }
// PrewarmingPoolSize is neither supported nor required. It is handled transparently by AWS. // PrewarmingPoolSize is neither supported nor required. It is handled transparently by AWS.

View File

@ -328,5 +328,6 @@ const (
ErrorNomadUnreachable ErrorCode = "NOMAD_UNREACHABLE" ErrorNomadUnreachable ErrorCode = "NOMAD_UNREACHABLE"
ErrorNomadOverload ErrorCode = "NOMAD_OVERLOAD" ErrorNomadOverload ErrorCode = "NOMAD_OVERLOAD"
ErrorNomadInternalServerError ErrorCode = "NOMAD_INTERNAL_SERVER_ERROR" ErrorNomadInternalServerError ErrorCode = "NOMAD_INTERNAL_SERVER_ERROR"
PrewarmingPoolDepleting ErrorCode = "PREWARMING_POOL_DEPLETING"
ErrorUnknown ErrorCode = "UNKNOWN" ErrorUnknown ErrorCode = "UNKNOWN"
) )