Enable memory oversubscription (#102)

* Enable memory oversubscription

* Fix and add e2e test
This commit is contained in:
Maximilian Paß
2022-03-18 08:31:27 +01:00
committed by GitHub
parent 708ae3679e
commit a41659eed4
8 changed files with 46 additions and 14 deletions

View File

@ -146,13 +146,15 @@ jobs:
wget -q "https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS" wget -q "https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS"
grep "nomad_${NOMAD_VERSION}_linux_amd64.zip" nomad_${NOMAD_VERSION}_SHA256SUMS | sha256sum -c - grep "nomad_${NOMAD_VERSION}_linux_amd64.zip" nomad_${NOMAD_VERSION}_SHA256SUMS | sha256sum -c -
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
- name: Set Nomad Config
run: echo "server { default_scheduler_config { memory_oversubscription_enabled = true } }" > e2e-config.hcl
- name: Download Poseidon binary - name: Download Poseidon binary
uses: actions/download-artifact@v2 uses: actions/download-artifact@v2
with: with:
name: poseidon name: poseidon
- name: Run e2e tests - name: Run e2e tests
run: | run: |
sudo ./nomad agent -dev -log-level=WARN & sudo ./nomad agent -dev -log-level=WARN -config e2e-config.hcl &
until curl -s --fail http://localhost:4646/v1/agent/health ; do sleep 1; done until curl -s --fail http://localhost:4646/v1/agent/health ; do sleep 1; done
chmod +x ./poseidon chmod +x ./poseidon
./poseidon & ./poseidon &

View File

@ -27,7 +27,7 @@ components:
type: number type: number
example: 100 example: 100
memoryLimit: memoryLimit:
description: Memory limit for one runner in MB description: Memory limit for one runner in MB. Exceeding the limit may result in termination of the runner.
type: integer type: integer
example: 256 example: 256
networkAccess: networkAccess:

View File

@ -10,5 +10,6 @@ server {
# https://www.nomadproject.io/docs/configuration/server # https://www.nomadproject.io/docs/configuration/server
default_scheduler_config { default_scheduler_config {
scheduler_algorithm = "spread" scheduler_algorithm = "spread"
memory_oversubscription_enabled = true
} }
} }

View File

@ -104,15 +104,20 @@ func (n *NomadEnvironment) SetCPULimit(limit uint) {
func (n *NomadEnvironment) MemoryLimit() uint { func (n *NomadEnvironment) MemoryLimit() uint {
defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job) defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job)
defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup) defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup)
return uint(*defaultTask.Resources.MemoryMB) maxMemoryLimit := defaultTask.Resources.MemoryMaxMB
if maxMemoryLimit != nil {
return uint(*maxMemoryLimit)
} else {
return 0
}
} }
func (n *NomadEnvironment) SetMemoryLimit(limit uint) { func (n *NomadEnvironment) SetMemoryLimit(limit uint) {
defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job) defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job)
defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup) defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup)
integerMemoryLimit := int(limit) integerMemoryMaxLimit := int(limit)
defaultTask.Resources.MemoryMB = &integerMemoryLimit defaultTask.Resources.MemoryMaxMB = &integerMemoryMaxLimit
} }
func (n *NomadEnvironment) Image() string { func (n *NomadEnvironment) Image() string {

View File

@ -42,7 +42,7 @@ job "template-0" {
resources { resources {
cpu = 40 cpu = 40
memory = 40 memory = 16
} }
restart { restart {

View File

@ -24,11 +24,12 @@ import (
*/ */
var ( var (
log = logging.GetLogger("e2e") log = logging.GetLogger("e2e")
testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests") testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests")
nomadClient *nomadApi.Client nomadClient *nomadApi.Client
nomadNamespace string nomadNamespace string
environmentIDs []dto.EnvironmentID environmentIDs []dto.EnvironmentID
defaultNomadEnvironment dto.ExecutionEnvironmentRequest
) )
type E2ETestSuite struct { type E2ETestSuite struct {
@ -102,7 +103,7 @@ func createDefaultEnvironment() {
path := helpers.BuildURL(api.BasePath, api.EnvironmentsPath, tests.DefaultEnvironmentIDAsString) path := helpers.BuildURL(api.BasePath, api.EnvironmentsPath, tests.DefaultEnvironmentIDAsString)
request := dto.ExecutionEnvironmentRequest{ defaultNomadEnvironment = dto.ExecutionEnvironmentRequest{
PrewarmingPoolSize: 10, PrewarmingPoolSize: 10,
CPULimit: 100, CPULimit: 100,
MemoryLimit: 100, MemoryLimit: 100,
@ -111,7 +112,7 @@ func createDefaultEnvironment() {
ExposedPorts: nil, ExposedPorts: nil,
} }
resp, err := helpers.HTTPPutJSON(path, request) resp, err := helpers.HTTPPutJSON(path, defaultNomadEnvironment)
if err != nil || resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusNoContent { if err != nil || resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusNoContent {
log.WithError(err).Fatal("Couldn't create default environment for e2e tests") log.WithError(err).Fatal("Couldn't create default environment for e2e tests")
} }

View File

@ -364,7 +364,7 @@ func validateJob(t *testing.T, expected dto.ExecutionEnvironmentRequest) {
task := taskGroup.Tasks[0] task := taskGroup.Tasks[0]
assertEqualValueIntPointer(t, int(expected.CPULimit), task.Resources.CPU) assertEqualValueIntPointer(t, int(expected.CPULimit), task.Resources.CPU)
assertEqualValueIntPointer(t, int(expected.MemoryLimit), task.Resources.MemoryMB) assertEqualValueIntPointer(t, int(expected.MemoryLimit), task.Resources.MemoryMaxMB)
assert.Equal(t, expected.Image, task.Config["image"]) assert.Equal(t, expected.Image, task.Config["image"])
if expected.NetworkAccess { if expected.NetworkAccess {

View File

@ -13,6 +13,7 @@ import (
"github.com/openHPI/poseidon/tests/helpers" "github.com/openHPI/poseidon/tests/helpers"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"net/http" "net/http"
"strconv"
"strings" "strings"
"time" "time"
) )
@ -175,6 +176,28 @@ func (s *E2ETestSuite) TestEchoEnvironment() {
} }
} }
func (s *E2ETestSuite) TestMemoryMaxLimit_Nomad() {
maxMemoryLimit := defaultNomadEnvironment.MemoryLimit
// The operating system is in charge to kill the process and sometimes tolerates small exceeding of the limit.
maxMemoryLimit = uint(1.1 * float64(maxMemoryLimit))
connection, err := ProvideWebSocketConnection(&s.Suite, tests.DefaultEnvironmentIDAsInteger, &dto.ExecutionRequest{
// This shell line tries to load maxMemoryLimit Bytes into the memory.
Command: "</dev/zero head -c " + strconv.Itoa(int(maxMemoryLimit)) + "MB | tail > /dev/null",
})
s.Require().NoError(err)
startMessage, err := helpers.ReceiveNextWebSocketMessage(connection)
s.Require().NoError(err)
s.Equal(dto.WebSocketMetaStart, startMessage.Type)
messages, err := helpers.ReceiveAllWebSocketMessages(connection)
s.Require().Error(err)
s.Equal(err, &websocket.CloseError{Code: websocket.CloseNormalClosure})
stdout, stderr, _ := helpers.WebSocketOutputMessages(messages)
s.Empty(stdout)
s.Contains(stderr, "Killed")
}
func (s *E2ETestSuite) TestNomadStderrFifoIsRemoved() { func (s *E2ETestSuite) TestNomadStderrFifoIsRemoved() {
runnerID, err := ProvideRunner(&dto.RunnerRequest{ runnerID, err := ProvideRunner(&dto.RunnerRequest{
ExecutionEnvironmentID: tests.DefaultEnvironmentIDAsInteger, ExecutionEnvironmentID: tests.DefaultEnvironmentIDAsInteger,