Enable memory oversubscription (#102)

* Enable memory oversubscription

* Fix and add e2e test
This commit is contained in:
Maximilian Paß
2022-03-18 08:31:27 +01:00
committed by GitHub
parent 708ae3679e
commit a41659eed4
8 changed files with 46 additions and 14 deletions

View File

@ -146,13 +146,15 @@ jobs:
wget -q "https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_SHA256SUMS"
grep "nomad_${NOMAD_VERSION}_linux_amd64.zip" nomad_${NOMAD_VERSION}_SHA256SUMS | sha256sum -c -
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
- name: Set Nomad Config
run: echo "server { default_scheduler_config { memory_oversubscription_enabled = true } }" > e2e-config.hcl
- name: Download Poseidon binary
uses: actions/download-artifact@v2
with:
name: poseidon
- name: Run e2e tests
run: |
sudo ./nomad agent -dev -log-level=WARN &
sudo ./nomad agent -dev -log-level=WARN -config e2e-config.hcl &
until curl -s --fail http://localhost:4646/v1/agent/health ; do sleep 1; done
chmod +x ./poseidon
./poseidon &

View File

@ -27,7 +27,7 @@ components:
type: number
example: 100
memoryLimit:
description: Memory limit for one runner in MB
description: Memory limit for one runner in MB. Exceeding the limit may result in termination of the runner.
type: integer
example: 256
networkAccess:

View File

@ -10,5 +10,6 @@ server {
# https://www.nomadproject.io/docs/configuration/server
default_scheduler_config {
scheduler_algorithm = "spread"
memory_oversubscription_enabled = true
}
}

View File

@ -104,15 +104,20 @@ func (n *NomadEnvironment) SetCPULimit(limit uint) {
func (n *NomadEnvironment) MemoryLimit() uint {
defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job)
defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup)
return uint(*defaultTask.Resources.MemoryMB)
maxMemoryLimit := defaultTask.Resources.MemoryMaxMB
if maxMemoryLimit != nil {
return uint(*maxMemoryLimit)
} else {
return 0
}
}
func (n *NomadEnvironment) SetMemoryLimit(limit uint) {
defaultTaskGroup := nomad.FindAndValidateDefaultTaskGroup(n.job)
defaultTask := nomad.FindAndValidateDefaultTask(defaultTaskGroup)
integerMemoryLimit := int(limit)
defaultTask.Resources.MemoryMB = &integerMemoryLimit
integerMemoryMaxLimit := int(limit)
defaultTask.Resources.MemoryMaxMB = &integerMemoryMaxLimit
}
func (n *NomadEnvironment) Image() string {

View File

@ -42,7 +42,7 @@ job "template-0" {
resources {
cpu = 40
memory = 40
memory = 16
}
restart {

View File

@ -24,11 +24,12 @@ import (
*/
var (
log = logging.GetLogger("e2e")
testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests")
nomadClient *nomadApi.Client
nomadNamespace string
environmentIDs []dto.EnvironmentID
log = logging.GetLogger("e2e")
testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests")
nomadClient *nomadApi.Client
nomadNamespace string
environmentIDs []dto.EnvironmentID
defaultNomadEnvironment dto.ExecutionEnvironmentRequest
)
type E2ETestSuite struct {
@ -102,7 +103,7 @@ func createDefaultEnvironment() {
path := helpers.BuildURL(api.BasePath, api.EnvironmentsPath, tests.DefaultEnvironmentIDAsString)
request := dto.ExecutionEnvironmentRequest{
defaultNomadEnvironment = dto.ExecutionEnvironmentRequest{
PrewarmingPoolSize: 10,
CPULimit: 100,
MemoryLimit: 100,
@ -111,7 +112,7 @@ func createDefaultEnvironment() {
ExposedPorts: nil,
}
resp, err := helpers.HTTPPutJSON(path, request)
resp, err := helpers.HTTPPutJSON(path, defaultNomadEnvironment)
if err != nil || resp.StatusCode != http.StatusCreated && resp.StatusCode != http.StatusNoContent {
log.WithError(err).Fatal("Couldn't create default environment for e2e tests")
}

View File

@ -364,7 +364,7 @@ func validateJob(t *testing.T, expected dto.ExecutionEnvironmentRequest) {
task := taskGroup.Tasks[0]
assertEqualValueIntPointer(t, int(expected.CPULimit), task.Resources.CPU)
assertEqualValueIntPointer(t, int(expected.MemoryLimit), task.Resources.MemoryMB)
assertEqualValueIntPointer(t, int(expected.MemoryLimit), task.Resources.MemoryMaxMB)
assert.Equal(t, expected.Image, task.Config["image"])
if expected.NetworkAccess {

View File

@ -13,6 +13,7 @@ import (
"github.com/openHPI/poseidon/tests/helpers"
"github.com/stretchr/testify/suite"
"net/http"
"strconv"
"strings"
"time"
)
@ -175,6 +176,28 @@ func (s *E2ETestSuite) TestEchoEnvironment() {
}
}
func (s *E2ETestSuite) TestMemoryMaxLimit_Nomad() {
maxMemoryLimit := defaultNomadEnvironment.MemoryLimit
// The operating system is in charge to kill the process and sometimes tolerates small exceeding of the limit.
maxMemoryLimit = uint(1.1 * float64(maxMemoryLimit))
connection, err := ProvideWebSocketConnection(&s.Suite, tests.DefaultEnvironmentIDAsInteger, &dto.ExecutionRequest{
// This shell line tries to load maxMemoryLimit Bytes into the memory.
Command: "</dev/zero head -c " + strconv.Itoa(int(maxMemoryLimit)) + "MB | tail > /dev/null",
})
s.Require().NoError(err)
startMessage, err := helpers.ReceiveNextWebSocketMessage(connection)
s.Require().NoError(err)
s.Equal(dto.WebSocketMetaStart, startMessage.Type)
messages, err := helpers.ReceiveAllWebSocketMessages(connection)
s.Require().Error(err)
s.Equal(err, &websocket.CloseError{Code: websocket.CloseNormalClosure})
stdout, stderr, _ := helpers.WebSocketOutputMessages(messages)
s.Empty(stdout)
s.Contains(stderr, "Killed")
}
func (s *E2ETestSuite) TestNomadStderrFifoIsRemoved() {
runnerID, err := ProvideRunner(&dto.RunnerRequest{
ExecutionEnvironmentID: tests.DefaultEnvironmentIDAsInteger,