diff --git a/api/swagger.yaml b/api/swagger.yaml index 0acf2a1..8d84475 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -142,6 +142,7 @@ components: - NOMAD_UNREACHABLE - NOMAD_OVERLOAD - NOMAD_INTERNAL_SERVER_ERROR + - PREWARMING_POOL_DEPLETING - UNKNOWN example: NOMAD_UNREACHABLE @@ -162,7 +163,8 @@ paths: responses: "204": description: Everything okay - + "503": + $ref: "#/components/responses/InternalServerError" /version: get: summary: Retrieve the version of Poseidon diff --git a/configuration.example.yaml b/configuration.example.yaml index a66191a..3f2c5f5 100644 --- a/configuration.example.yaml +++ b/configuration.example.yaml @@ -19,6 +19,10 @@ server: interactivestderr: true # If set, the file at the given path overwrites the default Nomad job file in internal/environment/template-environment-job.hcl # templatejobfile: ./poseidon.hcl + # The prewarming pool alert threshold [0, 1) defines which part of the prewarming pool should always be filled. + # Setting it to 0 will disable the alert. + # If the prewarming pool is filled for less than, i.e., 50%, the health route of Poseidon will return a warning. + prewarmingpoolalertthreshold: 0.5 # Configuration of the used Nomad cluster nomad: diff --git a/deploy/codeocean-terraform b/deploy/codeocean-terraform index 77e99a5..2717dd9 160000 --- a/deploy/codeocean-terraform +++ b/deploy/codeocean-terraform @@ -1 +1 @@ -Subproject commit 77e99a52e04a33be897d058b265080d7bbd5ea42 +Subproject commit 2717dd9ad672988980c30dce5619639aeb4570d4 diff --git a/internal/api/api.go b/internal/api/api.go index 0af22a4..aff2e64 100644 --- a/internal/api/api.go +++ b/internal/api/api.go @@ -46,7 +46,7 @@ func configureV1Router(router *mux.Router, w.WriteHeader(http.StatusNotFound) }) v1 := router.PathPrefix(BasePath).Subrouter() - v1.HandleFunc(HealthPath, Health).Methods(http.MethodGet).Name(HealthPath) + v1.HandleFunc(HealthPath, Health(environmentManager)).Methods(http.MethodGet).Name(HealthPath) v1.HandleFunc(VersionPath, Version).Methods(http.MethodGet).Name(VersionPath) runnerController := &RunnerController{manager: runnerManager} diff --git a/internal/api/health.go b/internal/api/health.go index f3cd795..05acc61 100644 --- a/internal/api/health.go +++ b/internal/api/health.go @@ -1,12 +1,42 @@ package api import ( + "errors" + "fmt" + "github.com/openHPI/poseidon/internal/config" + "github.com/openHPI/poseidon/internal/environment" + "github.com/openHPI/poseidon/pkg/dto" "net/http" + "strings" ) +var ErrorPrewarmingPoolDepleting = errors.New("the prewarming pool is depleting") + // Health handles the health route. // It responds that the server is alive. // If it is not, the response won't reach the client. -func Health(writer http.ResponseWriter, _ *http.Request) { - writer.WriteHeader(http.StatusNoContent) +func Health(manager environment.Manager) http.HandlerFunc { + return func(writer http.ResponseWriter, request *http.Request) { + if err := checkPrewarmingPool(manager); err != nil { + sendJSON(writer, &dto.InternalServerError{Message: err.Error(), ErrorCode: dto.PrewarmingPoolDepleting}, + http.StatusServiceUnavailable, request.Context()) + return + } + + writer.WriteHeader(http.StatusNoContent) + } +} + +func checkPrewarmingPool(manager environment.Manager) error { + var depletingEnvironments []int + for _, data := range manager.Statistics() { + if float64(data.IdleRunners)/float64(data.PrewarmingPoolSize) < config.Config.Server.PrewarmingPoolAlertThreshold { + depletingEnvironments = append(depletingEnvironments, data.ID) + } + } + if len(depletingEnvironments) > 0 { + arrayToString := strings.Trim(strings.Join(strings.Fields(fmt.Sprint(depletingEnvironments)), ","), "[]") + return fmt.Errorf("%w: environments %s", ErrorPrewarmingPoolDepleting, arrayToString) + } + return nil } diff --git a/internal/api/health_test.go b/internal/api/health_test.go index f06b82d..4338b50 100644 --- a/internal/api/health_test.go +++ b/internal/api/health_test.go @@ -1,16 +1,55 @@ package api import ( + "encoding/json" + "github.com/openHPI/poseidon/internal/config" + "github.com/openHPI/poseidon/internal/environment" + "github.com/openHPI/poseidon/pkg/dto" + "github.com/openHPI/poseidon/tests" + "io" "net/http" "net/http/httptest" ) -func (s *MainTestSuite) TestHealthRoute() { - request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody) - if err != nil { - s.T().Fatal(err) - } - recorder := httptest.NewRecorder() - http.HandlerFunc(Health).ServeHTTP(recorder, request) - s.Equal(http.StatusNoContent, recorder.Code) +func (s *MainTestSuite) TestHealth() { + s.Run("returns StatusNoContent as default", func() { + request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody) + if err != nil { + s.T().Fatal(err) + } + recorder := httptest.NewRecorder() + manager := &environment.ManagerHandlerMock{} + manager.On("Statistics").Return(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{}) + + Health(manager).ServeHTTP(recorder, request) + s.Equal(http.StatusNoContent, recorder.Code) + }) + s.Run("returns InternalServerError for warnings and errors", func() { + s.Run("Prewarming Pool Alert", func() { + request, err := http.NewRequest(http.MethodGet, "/health", http.NoBody) + if err != nil { + s.T().Fatal(err) + } + recorder := httptest.NewRecorder() + manager := &environment.ManagerHandlerMock{} + manager.On("Statistics").Return(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{ + tests.DefaultEnvironmentIDAsInteger: { + ID: tests.DefaultEnvironmentIDAsInteger, + PrewarmingPoolSize: 3, + IdleRunners: 1, + }, + }) + config.Config.Server.PrewarmingPoolAlertThreshold = 0.5 + + Health(manager).ServeHTTP(recorder, request) + s.Equal(http.StatusServiceUnavailable, recorder.Code) + + b, err := io.ReadAll(recorder.Body) + s.Require().NoError(err) + var details dto.InternalServerError + err = json.Unmarshal(b, &details) + s.Require().NoError(err) + s.Contains(details.Message, ErrorPrewarmingPoolDepleting.Error()) + }) + }) } diff --git a/internal/config/config.go b/internal/config/config.go index eb9abbd..bb7d4f4 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -29,8 +29,9 @@ var ( CertFile: "", KeyFile: "", }, - InteractiveStderr: true, - TemplateJobFile: "", + InteractiveStderr: true, + TemplateJobFile: "", + PrewarmingPoolAlertThreshold: 0, }, Nomad: Nomad{ Enabled: true, @@ -81,12 +82,13 @@ var ( // server configures the Poseidon webserver. type server struct { - Address string - Port int - Token string - TLS TLS - InteractiveStderr bool - TemplateJobFile string + Address string + Port int + Token string + TLS TLS + InteractiveStderr bool + TemplateJobFile string + PrewarmingPoolAlertThreshold float64 } // URL returns the URL of the Poseidon webserver. diff --git a/internal/environment/aws_environment.go b/internal/environment/aws_environment.go index eb10978..c9e1a45 100644 --- a/internal/environment/aws_environment.go +++ b/internal/environment/aws_environment.go @@ -60,8 +60,9 @@ func (a *AWSEnvironment) Sample() (r runner.Runner, ok bool) { // The following methods are not supported at this moment. // IdleRunnerCount is not supported as we have no information about the AWS managed prewarming pool. +// For the Poseidon Health check we default to 1. func (a *AWSEnvironment) IdleRunnerCount() uint { - return 0 + return 1 } // PrewarmingPoolSize is neither supported nor required. It is handled transparently by AWS. diff --git a/pkg/dto/dto.go b/pkg/dto/dto.go index 56f1f01..217a859 100644 --- a/pkg/dto/dto.go +++ b/pkg/dto/dto.go @@ -328,5 +328,6 @@ const ( ErrorNomadUnreachable ErrorCode = "NOMAD_UNREACHABLE" ErrorNomadOverload ErrorCode = "NOMAD_OVERLOAD" ErrorNomadInternalServerError ErrorCode = "NOMAD_INTERNAL_SERVER_ERROR" + PrewarmingPoolDepleting ErrorCode = "PREWARMING_POOL_DEPLETING" ErrorUnknown ErrorCode = "UNKNOWN" )