Add Prewarming Pool Alert
that checks for every environment if the filled share of the prewarmin pool is at least the specified threshold.
This commit is contained in:

committed by
Sebastian Serth

parent
1be3ce5ae1
commit
c46a09eeae
@ -142,6 +142,7 @@ components:
|
|||||||
- NOMAD_UNREACHABLE
|
- NOMAD_UNREACHABLE
|
||||||
- NOMAD_OVERLOAD
|
- NOMAD_OVERLOAD
|
||||||
- NOMAD_INTERNAL_SERVER_ERROR
|
- NOMAD_INTERNAL_SERVER_ERROR
|
||||||
|
- PREWARMING_POOL_DEPLETING
|
||||||
- UNKNOWN
|
- UNKNOWN
|
||||||
example: NOMAD_UNREACHABLE
|
example: NOMAD_UNREACHABLE
|
||||||
|
|
||||||
@ -162,7 +163,8 @@ paths:
|
|||||||
responses:
|
responses:
|
||||||
"204":
|
"204":
|
||||||
description: Everything okay
|
description: Everything okay
|
||||||
|
"503":
|
||||||
|
$ref: "#/components/responses/InternalServerError"
|
||||||
/version:
|
/version:
|
||||||
get:
|
get:
|
||||||
summary: Retrieve the version of Poseidon
|
summary: Retrieve the version of Poseidon
|
||||||
|
@ -19,6 +19,10 @@ server:
|
|||||||
interactivestderr: true
|
interactivestderr: true
|
||||||
# If set, the file at the given path overwrites the default Nomad job file in internal/environment/template-environment-job.hcl
|
# If set, the file at the given path overwrites the default Nomad job file in internal/environment/template-environment-job.hcl
|
||||||
# templatejobfile: ./poseidon.hcl
|
# templatejobfile: ./poseidon.hcl
|
||||||
|
# The prewarming pool alert threshold [0, 1) defines which part of the prewarming pool should always be filled.
|
||||||
|
# Setting it to 0 will disable the alert.
|
||||||
|
# If the prewarming pool is filled for less than, i.e., 50%, the health route of Poseidon will return a warning.
|
||||||
|
prewarmingpoolalertthreshold: 0.5
|
||||||
|
|
||||||
# Configuration of the used Nomad cluster
|
# Configuration of the used Nomad cluster
|
||||||
nomad:
|
nomad:
|
||||||
|
Submodule deploy/codeocean-terraform updated: 77e99a52e0...2717dd9ad6
@ -46,7 +46,7 @@ func configureV1Router(router *mux.Router,
|
|||||||
w.WriteHeader(http.StatusNotFound)
|
w.WriteHeader(http.StatusNotFound)
|
||||||
})
|
})
|
||||||
v1 := router.PathPrefix(BasePath).Subrouter()
|
v1 := router.PathPrefix(BasePath).Subrouter()
|
||||||
v1.HandleFunc(HealthPath, Health).Methods(http.MethodGet).Name(HealthPath)
|
v1.HandleFunc(HealthPath, Health(environmentManager)).Methods(http.MethodGet).Name(HealthPath)
|
||||||
v1.HandleFunc(VersionPath, Version).Methods(http.MethodGet).Name(VersionPath)
|
v1.HandleFunc(VersionPath, Version).Methods(http.MethodGet).Name(VersionPath)
|
||||||
|
|
||||||
runnerController := &RunnerController{manager: runnerManager}
|
runnerController := &RunnerController{manager: runnerManager}
|
||||||
|
@ -1,12 +1,42 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"github.com/openHPI/poseidon/internal/config"
|
||||||
|
"github.com/openHPI/poseidon/internal/environment"
|
||||||
|
"github.com/openHPI/poseidon/pkg/dto"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var ErrorPrewarmingPoolDepleting = errors.New("the prewarming pool is depleting")
|
||||||
|
|
||||||
// Health handles the health route.
|
// Health handles the health route.
|
||||||
// It responds that the server is alive.
|
// It responds that the server is alive.
|
||||||
// If it is not, the response won't reach the client.
|
// If it is not, the response won't reach the client.
|
||||||
func Health(writer http.ResponseWriter, _ *http.Request) {
|
func Health(manager environment.Manager) http.HandlerFunc {
|
||||||
writer.WriteHeader(http.StatusNoContent)
|
return func(writer http.ResponseWriter, request *http.Request) {
|
||||||
|
if err := checkPrewarmingPool(manager); err != nil {
|
||||||
|
sendJSON(writer, &dto.InternalServerError{Message: err.Error(), ErrorCode: dto.PrewarmingPoolDepleting},
|
||||||
|
http.StatusServiceUnavailable, request.Context())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.WriteHeader(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func checkPrewarmingPool(manager environment.Manager) error {
|
||||||
|
var depletingEnvironments []int
|
||||||
|
for _, data := range manager.Statistics() {
|
||||||
|
if float64(data.IdleRunners)/float64(data.PrewarmingPoolSize) < config.Config.Server.PrewarmingPoolAlertThreshold {
|
||||||
|
depletingEnvironments = append(depletingEnvironments, data.ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(depletingEnvironments) > 0 {
|
||||||
|
arrayToString := strings.Trim(strings.Join(strings.Fields(fmt.Sprint(depletingEnvironments)), ","), "[]")
|
||||||
|
return fmt.Errorf("%w: environments %s", ErrorPrewarmingPoolDepleting, arrayToString)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1,16 +1,55 @@
|
|||||||
package api
|
package api
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"github.com/openHPI/poseidon/internal/config"
|
||||||
|
"github.com/openHPI/poseidon/internal/environment"
|
||||||
|
"github.com/openHPI/poseidon/pkg/dto"
|
||||||
|
"github.com/openHPI/poseidon/tests"
|
||||||
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (s *MainTestSuite) TestHealthRoute() {
|
func (s *MainTestSuite) TestHealth() {
|
||||||
request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody)
|
s.Run("returns StatusNoContent as default", func() {
|
||||||
if err != nil {
|
request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody)
|
||||||
s.T().Fatal(err)
|
if err != nil {
|
||||||
}
|
s.T().Fatal(err)
|
||||||
recorder := httptest.NewRecorder()
|
}
|
||||||
http.HandlerFunc(Health).ServeHTTP(recorder, request)
|
recorder := httptest.NewRecorder()
|
||||||
s.Equal(http.StatusNoContent, recorder.Code)
|
manager := &environment.ManagerHandlerMock{}
|
||||||
|
manager.On("Statistics").Return(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{})
|
||||||
|
|
||||||
|
Health(manager).ServeHTTP(recorder, request)
|
||||||
|
s.Equal(http.StatusNoContent, recorder.Code)
|
||||||
|
})
|
||||||
|
s.Run("returns InternalServerError for warnings and errors", func() {
|
||||||
|
s.Run("Prewarming Pool Alert", func() {
|
||||||
|
request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody)
|
||||||
|
if err != nil {
|
||||||
|
s.T().Fatal(err)
|
||||||
|
}
|
||||||
|
recorder := httptest.NewRecorder()
|
||||||
|
manager := &environment.ManagerHandlerMock{}
|
||||||
|
manager.On("Statistics").Return(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{
|
||||||
|
tests.DefaultEnvironmentIDAsInteger: {
|
||||||
|
ID: tests.DefaultEnvironmentIDAsInteger,
|
||||||
|
PrewarmingPoolSize: 3,
|
||||||
|
IdleRunners: 1,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
config.Config.Server.PrewarmingPoolAlertThreshold = 0.5
|
||||||
|
|
||||||
|
Health(manager).ServeHTTP(recorder, request)
|
||||||
|
s.Equal(http.StatusServiceUnavailable, recorder.Code)
|
||||||
|
|
||||||
|
b, err := io.ReadAll(recorder.Body)
|
||||||
|
s.Require().NoError(err)
|
||||||
|
var details dto.InternalServerError
|
||||||
|
err = json.Unmarshal(b, &details)
|
||||||
|
s.Require().NoError(err)
|
||||||
|
s.Contains(details.Message, ErrorPrewarmingPoolDepleting.Error())
|
||||||
|
})
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
@ -29,8 +29,9 @@ var (
|
|||||||
CertFile: "",
|
CertFile: "",
|
||||||
KeyFile: "",
|
KeyFile: "",
|
||||||
},
|
},
|
||||||
InteractiveStderr: true,
|
InteractiveStderr: true,
|
||||||
TemplateJobFile: "",
|
TemplateJobFile: "",
|
||||||
|
PrewarmingPoolAlertThreshold: 0,
|
||||||
},
|
},
|
||||||
Nomad: Nomad{
|
Nomad: Nomad{
|
||||||
Enabled: true,
|
Enabled: true,
|
||||||
@ -81,12 +82,13 @@ var (
|
|||||||
|
|
||||||
// server configures the Poseidon webserver.
|
// server configures the Poseidon webserver.
|
||||||
type server struct {
|
type server struct {
|
||||||
Address string
|
Address string
|
||||||
Port int
|
Port int
|
||||||
Token string
|
Token string
|
||||||
TLS TLS
|
TLS TLS
|
||||||
InteractiveStderr bool
|
InteractiveStderr bool
|
||||||
TemplateJobFile string
|
TemplateJobFile string
|
||||||
|
PrewarmingPoolAlertThreshold float64
|
||||||
}
|
}
|
||||||
|
|
||||||
// URL returns the URL of the Poseidon webserver.
|
// URL returns the URL of the Poseidon webserver.
|
||||||
|
@ -60,8 +60,9 @@ func (a *AWSEnvironment) Sample() (r runner.Runner, ok bool) {
|
|||||||
// The following methods are not supported at this moment.
|
// The following methods are not supported at this moment.
|
||||||
|
|
||||||
// IdleRunnerCount is not supported as we have no information about the AWS managed prewarming pool.
|
// IdleRunnerCount is not supported as we have no information about the AWS managed prewarming pool.
|
||||||
|
// For the Poseidon Health check we default to 1.
|
||||||
func (a *AWSEnvironment) IdleRunnerCount() uint {
|
func (a *AWSEnvironment) IdleRunnerCount() uint {
|
||||||
return 0
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// PrewarmingPoolSize is neither supported nor required. It is handled transparently by AWS.
|
// PrewarmingPoolSize is neither supported nor required. It is handled transparently by AWS.
|
||||||
|
@ -328,5 +328,6 @@ const (
|
|||||||
ErrorNomadUnreachable ErrorCode = "NOMAD_UNREACHABLE"
|
ErrorNomadUnreachable ErrorCode = "NOMAD_UNREACHABLE"
|
||||||
ErrorNomadOverload ErrorCode = "NOMAD_OVERLOAD"
|
ErrorNomadOverload ErrorCode = "NOMAD_OVERLOAD"
|
||||||
ErrorNomadInternalServerError ErrorCode = "NOMAD_INTERNAL_SERVER_ERROR"
|
ErrorNomadInternalServerError ErrorCode = "NOMAD_INTERNAL_SERVER_ERROR"
|
||||||
|
PrewarmingPoolDepleting ErrorCode = "PREWARMING_POOL_DEPLETING"
|
||||||
ErrorUnknown ErrorCode = "UNKNOWN"
|
ErrorUnknown ErrorCode = "UNKNOWN"
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user