From a41659eed4ebb8bcce8e9218ba9665dc6ffc480d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Pa=C3=9F?= <22845248+mpass99@users.noreply.github.com> Date: Fri, 18 Mar 2022 08:31:27 +0100 Subject: [PATCH] Enable memory oversubscription (#102) * Enable memory oversubscription * Fix and add e2e test --- .github/workflows/ci.yml | 4 +++- api/swagger.yaml | 2 +- docs/resources/server.example.hcl | 1 + internal/environment/nomad_environment.go | 11 ++++++--- .../environment/template-environment-job.hcl | 2 +- tests/e2e/e2e_test.go | 15 ++++++------ tests/e2e/environments_test.go | 2 +- tests/e2e/websocket_test.go | 23 +++++++++++++++++++ 8 files changed, 46 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 180ff0c..19c3ff6 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -146,13 +146,15 @@ jobs: wget -q "https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS" grep "nomad_${NOMAD_VERSION}_linux_amd64.zip" nomad_${NOMAD_VERSION}_SHA256SUMS | sha256sum -c - unzip nomad_${NOMAD_VERSION}_linux_amd64.zip + - name: Set Nomad Config + run: echo "server { default_scheduler_config { memory_oversubscription_enabled = true } }" > e2e-config.hcl - name: Download Poseidon binary uses: actions/download-artifact@v2 with: name: poseidon - name: Run e2e tests run: | - sudo ./nomad agent -dev -log-level=WARN & + sudo ./nomad agent -dev -log-level=WARN -config e2e-config.hcl & until curl -s --fail http://localhost:4646/v1/agent/health ; do sleep 1; done chmod +x ./poseidon ./poseidon & diff --git a/api/swagger.yaml b/api/swagger.yaml index c08cbee..945d0ef 100644 --- a/api/swagger.yaml +++ b/api/swagger.yaml @@ -27,7 +27,7 @@ components: type: number example: 100 memoryLimit: - description: Memory limit for one runner in MB + description: Memory limit for one runner in MB. Exceeding the limit may result in termination of the runner. type: integer example: 256 networkAccess: diff --git a/docs/resources/server.example.hcl b/docs/resources/server.example.hcl index 4bf641a..3685128 100644 --- a/docs/resources/server.example.hcl +++ b/docs/resources/server.example.hcl @@ -10,5 +10,6 @@ server { # https://www.nomadproject.io/docs/configuration/server default_scheduler_config { scheduler_algorithm = "spread" + memory_oversubscription_enabled = true } } diff --git a/internal/environment/nomad_environment.go b/internal/environment/nomad_environment.go index cc5a46c..0a3cf04 100644 --- a/internal/environment/nomad_environment.go +++ b/internal/environment/nomad_environment.go @@ -104,15 +104,20 @@ func (n *NomadEnvironment) SetCPULimit(limit uint) { func (n *NomadEnvironment) MemoryLimit() uint { defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job) defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup) - return uint(*defaultTask.Resources.MemoryMB) + maxMemoryLimit := defaultTask.Resources.MemoryMaxMB + if maxMemoryLimit != nil { + return uint(*maxMemoryLimit) + } else { + return 0 + } } func (n *NomadEnvironment) SetMemoryLimit(limit uint) { defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job) defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup) - integerMemoryLimit := int(limit) - defaultTask.Resources.MemoryMB = &integerMemoryLimit + integerMemoryMaxLimit := int(limit) + defaultTask.Resources.MemoryMaxMB = &integerMemoryMaxLimit } func (n *NomadEnvironment) Image() string { diff --git a/internal/environment/template-environment-job.hcl b/internal/environment/template-environment-job.hcl index 646d9a3..6c245e5 100644 --- a/internal/environment/template-environment-job.hcl +++ b/internal/environment/template-environment-job.hcl @@ -42,7 +42,7 @@ job "template-0" { resources { cpu = 40 - memory = 40 + memory = 16 } restart { diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go index ee3dc5e..b065e31 100644 --- a/tests/e2e/e2e_test.go +++ b/tests/e2e/e2e_test.go @@ -24,11 +24,12 @@ import ( */ var ( - log = logging.GetLogger("e2e") - testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests") - nomadClient *nomadApi.Client - nomadNamespace string - environmentIDs []dto.EnvironmentID + log = logging.GetLogger("e2e") + testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests") + nomadClient *nomadApi.Client + nomadNamespace string + environmentIDs []dto.EnvironmentID + defaultNomadEnvironment dto.ExecutionEnvironmentRequest ) type E2ETestSuite struct { @@ -102,7 +103,7 @@ func createDefaultEnvironment() { path := helpers.BuildURL(api.BasePath, api.EnvironmentsPath, tests.DefaultEnvironmentIDAsString) - request := dto.ExecutionEnvironmentRequest{ + defaultNomadEnvironment = dto.ExecutionEnvironmentRequest{ PrewarmingPoolSize: 10, CPULimit: 100, MemoryLimit: 100, @@ -111,7 +112,7 @@ func createDefaultEnvironment() { ExposedPorts: nil, } - resp, err := helpers.HTTPPutJSON(path, request) + resp, err := helpers.HTTPPutJSON(path, defaultNomadEnvironment) if err != nil || resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusNoContent { log.WithError(err).Fatal("Couldn't create default environment for e2e tests") } diff --git a/tests/e2e/environments_test.go b/tests/e2e/environments_test.go index fe921b2..5baa2d2 100644 --- a/tests/e2e/environments_test.go +++ b/tests/e2e/environments_test.go @@ -364,7 +364,7 @@ func validateJob(t *testing.T, expected dto.ExecutionEnvironmentRequest) { task := taskGroup.Tasks[0] assertEqualValueIntPointer(t, int(expected.CPULimit), task.Resources.CPU) - assertEqualValueIntPointer(t, int(expected.MemoryLimit), task.Resources.MemoryMB) + assertEqualValueIntPointer(t, int(expected.MemoryLimit), task.Resources.MemoryMaxMB) assert.Equal(t, expected.Image, task.Config["image"]) if expected.NetworkAccess { diff --git a/tests/e2e/websocket_test.go b/tests/e2e/websocket_test.go index abb0085..cbe071d 100644 --- a/tests/e2e/websocket_test.go +++ b/tests/e2e/websocket_test.go @@ -13,6 +13,7 @@ import ( "github.com/openHPI/poseidon/tests/helpers" "github.com/stretchr/testify/suite" "net/http" + "strconv" "strings" "time" ) @@ -175,6 +176,28 @@ func (s *E2ETestSuite) TestEchoEnvironment() { } } +func (s *E2ETestSuite) TestMemoryMaxLimit_Nomad() { + maxMemoryLimit := defaultNomadEnvironment.MemoryLimit + // The operating system is in charge to kill the process and sometimes tolerates small exceeding of the limit. + maxMemoryLimit = uint(1.1 * float64(maxMemoryLimit)) + connection, err := ProvideWebSocketConnection(&s.Suite, tests.DefaultEnvironmentIDAsInteger, &dto.ExecutionRequest{ + // This shell line tries to load maxMemoryLimit Bytes into the memory. + Command: " /dev/null", + }) + s.Require().NoError(err) + + startMessage, err := helpers.ReceiveNextWebSocketMessage(connection) + s.Require().NoError(err) + s.Equal(dto.WebSocketMetaStart, startMessage.Type) + + messages, err := helpers.ReceiveAllWebSocketMessages(connection) + s.Require().Error(err) + s.Equal(err, &websocket.CloseError{Code: websocket.CloseNormalClosure}) + stdout, stderr, _ := helpers.WebSocketOutputMessages(messages) + s.Empty(stdout) + s.Contains(stderr, "Killed") +} + func (s *E2ETestSuite) TestNomadStderrFifoIsRemoved() { runnerID, err := ProvideRunner(&dto.RunnerRequest{ ExecutionEnvironmentID: tests.DefaultEnvironmentIDAsInteger,