Restructure project

We previously didn't really had any structure in our project apart
from creating a new folder for each package in our project root.
Now that we have accumulated some packages, we use the well-known
Golang project layout in order to clearly communicate our intent
with packages. See https://github.com/golang-standards/project-layout
This commit is contained in:
sirkrypt0
2021-07-16 09:19:42 +02:00
parent 2f1383b743
commit 8b26ecbe5f
66 changed files with 95 additions and 95 deletions

57
internal/api/api.go Normal file
View File

@ -0,0 +1,57 @@
package api
import (
"github.com/gorilla/mux"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/api/auth"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/environment"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/logging"
"net/http"
)
var log = logging.GetLogger("api")
const (
BasePath = "/api/v1"
HealthPath = "/health"
RunnersPath = "/runners"
EnvironmentsPath = "/execution-environments"
)
// NewRouter returns a *mux.Router which can be
// used by the net/http package to serve the routes of our API. It
// always returns a router for the newest version of our API. We
// use gorilla/mux because it is more convenient than net/http, e.g.
// when extracting path parameters.
func NewRouter(runnerManager runner.Manager, environmentManager environment.Manager) *mux.Router {
router := mux.NewRouter()
// this can later be restricted to a specific host with
// `router.Host(...)` and to HTTPS with `router.Schemes("https")`
configureV1Router(router, runnerManager, environmentManager)
router.Use(logging.HTTPLoggingMiddleware)
return router
}
// configureV1Router configures a given router with the routes of version 1 of the Poseidon API.
func configureV1Router(router *mux.Router, runnerManager runner.Manager, environmentManager environment.Manager) {
v1 := router.PathPrefix(BasePath).Subrouter()
v1.HandleFunc(HealthPath, Health).Methods(http.MethodGet)
runnerController := &RunnerController{manager: runnerManager}
environmentController := &EnvironmentController{manager: environmentManager}
configureRoutes := func(router *mux.Router) {
runnerController.ConfigureRoutes(router)
environmentController.ConfigureRoutes(router)
}
if auth.InitializeAuthentication() {
// Create new authenticated subrouter.
// All routes added to v1 after this require authentication.
authenticatedV1Router := v1.PathPrefix("").Subrouter()
authenticatedV1Router.Use(auth.HTTPAuthenticationMiddleware)
runnerController.ConfigureRoutes(authenticatedV1Router)
} else {
configureRoutes(v1)
}
}

70
internal/api/api_test.go Normal file
View File

@ -0,0 +1,70 @@
package api
import (
"github.com/gorilla/mux"
"github.com/stretchr/testify/assert"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/config"
"net/http"
"net/http/httptest"
"testing"
)
func mockHTTPHandler(writer http.ResponseWriter, _ *http.Request) {
writer.WriteHeader(http.StatusOK)
}
func TestNewRouterV1WithAuthenticationDisabled(t *testing.T) {
config.Config.Server.Token = ""
router := mux.NewRouter()
configureV1Router(router, nil, nil)
t.Run("health route is accessible", func(t *testing.T) {
request, err := http.NewRequest(http.MethodGet, "/api/v1/health", nil)
if err != nil {
t.Fatal(err)
}
recorder := httptest.NewRecorder()
router.ServeHTTP(recorder, request)
assert.Equal(t, http.StatusNoContent, recorder.Code)
})
t.Run("added route is accessible", func(t *testing.T) {
router.HandleFunc("/api/v1/test", mockHTTPHandler)
request, err := http.NewRequest(http.MethodGet, "/api/v1/test", nil)
if err != nil {
t.Fatal(err)
}
recorder := httptest.NewRecorder()
router.ServeHTTP(recorder, request)
assert.Equal(t, http.StatusOK, recorder.Code)
})
}
func TestNewRouterV1WithAuthenticationEnabled(t *testing.T) {
config.Config.Server.Token = "TestToken"
router := mux.NewRouter()
configureV1Router(router, nil, nil)
t.Run("health route is accessible", func(t *testing.T) {
request, err := http.NewRequest(http.MethodGet, "/api/v1/health", nil)
if err != nil {
t.Fatal(err)
}
recorder := httptest.NewRecorder()
router.ServeHTTP(recorder, request)
assert.Equal(t, http.StatusNoContent, recorder.Code)
})
t.Run("protected route is not accessible", func(t *testing.T) {
// request an available API route that should be guarded by authentication.
// (which one, in particular, does not matter here)
request, err := http.NewRequest(http.MethodPost, "/api/v1/runners", nil)
if err != nil {
t.Fatal(err)
}
recorder := httptest.NewRecorder()
router.ServeHTTP(recorder, request)
assert.Equal(t, http.StatusUnauthorized, recorder.Code)
})
config.Config.Server.Token = ""
}

36
internal/api/auth/auth.go Normal file
View File

@ -0,0 +1,36 @@
package auth
import (
"crypto/subtle"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/config"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/logging"
"net/http"
)
var log = logging.GetLogger("api/auth")
const TokenHeader = "X-Poseidon-Token"
var correctAuthenticationToken []byte
// InitializeAuthentication returns true iff the authentication is initialized successfully and can be used.
func InitializeAuthentication() bool {
token := config.Config.Server.Token
if token == "" {
return false
}
correctAuthenticationToken = []byte(token)
return true
}
func HTTPAuthenticationMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
token := r.Header.Get(TokenHeader)
if subtle.ConstantTimeCompare([]byte(token), correctAuthenticationToken) == 0 {
log.WithField("token", token).Warn("Incorrect token")
w.WriteHeader(http.StatusUnauthorized)
return
}
next.ServeHTTP(w, r)
})
}

View File

@ -0,0 +1,91 @@
package auth
import (
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/config"
"net/http"
"net/http/httptest"
"testing"
)
const testToken = "C0rr3ctT0k3n"
type AuthenticationMiddlewareTestSuite struct {
suite.Suite
request *http.Request
recorder *httptest.ResponseRecorder
httpAuthenticationMiddleware http.Handler
}
func (s *AuthenticationMiddlewareTestSuite) SetupTest() {
correctAuthenticationToken = []byte(testToken)
s.recorder = httptest.NewRecorder()
request, err := http.NewRequest(http.MethodGet, "/api/v1/test", nil)
if err != nil {
s.T().Fatal(err)
}
s.request = request
s.httpAuthenticationMiddleware = HTTPAuthenticationMiddleware(
http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
}
func (s *AuthenticationMiddlewareTestSuite) TearDownTest() {
correctAuthenticationToken = []byte(nil)
}
func (s *AuthenticationMiddlewareTestSuite) TestReturns401WhenHeaderUnset() {
s.httpAuthenticationMiddleware.ServeHTTP(s.recorder, s.request)
assert.Equal(s.T(), http.StatusUnauthorized, s.recorder.Code)
}
func (s *AuthenticationMiddlewareTestSuite) TestReturns401WhenTokenWrong() {
s.request.Header.Set(TokenHeader, "Wr0ngT0k3n")
s.httpAuthenticationMiddleware.ServeHTTP(s.recorder, s.request)
assert.Equal(s.T(), http.StatusUnauthorized, s.recorder.Code)
}
func (s *AuthenticationMiddlewareTestSuite) TestWarnsWhenUnauthorized() {
var hook *test.Hook
logger, hook := test.NewNullLogger()
log = logger.WithField("pkg", "api/auth")
s.request.Header.Set(TokenHeader, "Wr0ngT0k3n")
s.httpAuthenticationMiddleware.ServeHTTP(s.recorder, s.request)
assert.Equal(s.T(), http.StatusUnauthorized, s.recorder.Code)
assert.Equal(s.T(), logrus.WarnLevel, hook.LastEntry().Level)
assert.Equal(s.T(), hook.LastEntry().Data["token"], "Wr0ngT0k3n")
}
func (s *AuthenticationMiddlewareTestSuite) TestPassesWhenTokenCorrect() {
s.request.Header.Set(TokenHeader, testToken)
s.httpAuthenticationMiddleware.ServeHTTP(s.recorder, s.request)
assert.Equal(s.T(), http.StatusOK, s.recorder.Code)
}
func TestHTTPAuthenticationMiddleware(t *testing.T) {
suite.Run(t, new(AuthenticationMiddlewareTestSuite))
}
func TestInitializeAuthentication(t *testing.T) {
t.Run("if token unset", func(t *testing.T) {
config.Config.Server.Token = ""
initialized := InitializeAuthentication()
assert.Equal(t, false, initialized)
assert.Equal(t, []byte(nil), correctAuthenticationToken, "it should not set correctAuthenticationToken")
})
t.Run("if token set", func(t *testing.T) {
config.Config.Server.Token = testToken
initialized := InitializeAuthentication()
assert.Equal(t, true, initialized)
assert.Equal(t, []byte(testToken), correctAuthenticationToken, "it should set correctAuthenticationToken")
config.Config.Server.Token = ""
correctAuthenticationToken = []byte(nil)
})
}

View File

@ -0,0 +1,59 @@
package api
import (
"encoding/json"
"errors"
"fmt"
"github.com/gorilla/mux"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/environment"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"net/http"
)
const (
executionEnvironmentIDKey = "executionEnvironmentId"
createOrUpdateRouteName = "createOrUpdate"
)
var ErrMissingURLParameter = errors.New("url parameter missing")
type EnvironmentController struct {
manager environment.Manager
}
func (e *EnvironmentController) ConfigureRoutes(router *mux.Router) {
environmentRouter := router.PathPrefix(EnvironmentsPath).Subrouter()
specificEnvironmentRouter := environmentRouter.Path(fmt.Sprintf("/{%s:[0-9]+}", executionEnvironmentIDKey)).Subrouter()
specificEnvironmentRouter.HandleFunc("", e.createOrUpdate).Methods(http.MethodPut).Name(createOrUpdateRouteName)
}
// createOrUpdate creates/updates an execution environment on the executor.
func (e *EnvironmentController) createOrUpdate(writer http.ResponseWriter, request *http.Request) {
req := new(dto.ExecutionEnvironmentRequest)
if err := json.NewDecoder(request.Body).Decode(req); err != nil {
writeBadRequest(writer, err)
return
}
id, ok := mux.Vars(request)[executionEnvironmentIDKey]
if !ok {
writeBadRequest(writer, fmt.Errorf("could not find %s: %w", executionEnvironmentIDKey, ErrMissingURLParameter))
return
}
environmentID, err := runner.NewEnvironmentID(id)
if err != nil {
writeBadRequest(writer, fmt.Errorf("could not update environment: %w", err))
return
}
created, err := e.manager.CreateOrUpdate(environmentID, *req)
if err != nil {
writeInternalServerError(writer, err, dto.ErrorUnknown)
}
if created {
writer.WriteHeader(http.StatusCreated)
} else {
writer.WriteHeader(http.StatusNoContent)
}
}

View File

@ -0,0 +1,117 @@
package api
import (
"bytes"
"encoding/json"
"github.com/gorilla/mux"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/environment"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"math"
"net/http"
"net/http/httptest"
"strconv"
"strings"
"testing"
)
type EnvironmentControllerTestSuite struct {
suite.Suite
manager *environment.ManagerMock
router *mux.Router
}
func TestEnvironmentControllerTestSuite(t *testing.T) {
suite.Run(t, new(EnvironmentControllerTestSuite))
}
func (s *EnvironmentControllerTestSuite) SetupTest() {
s.manager = &environment.ManagerMock{}
s.router = NewRouter(nil, s.manager)
}
type CreateOrUpdateEnvironmentTestSuite struct {
EnvironmentControllerTestSuite
path string
id runner.EnvironmentID
body []byte
}
func TestCreateOrUpdateEnvironmentTestSuite(t *testing.T) {
suite.Run(t, new(CreateOrUpdateEnvironmentTestSuite))
}
func (s *CreateOrUpdateEnvironmentTestSuite) SetupTest() {
s.EnvironmentControllerTestSuite.SetupTest()
s.id = tests.DefaultEnvironmentIDAsInteger
testURL, err := s.router.Get(createOrUpdateRouteName).URL(executionEnvironmentIDKey, strconv.Itoa(int(s.id)))
if err != nil {
s.T().Fatal(err)
}
s.path = testURL.String()
s.body, err = json.Marshal(dto.ExecutionEnvironmentRequest{})
if err != nil {
s.T().Fatal(err)
}
}
func (s *CreateOrUpdateEnvironmentTestSuite) recordRequest() *httptest.ResponseRecorder {
recorder := httptest.NewRecorder()
request, err := http.NewRequest(http.MethodPut, s.path, bytes.NewReader(s.body))
if err != nil {
s.T().Fatal(err)
}
s.router.ServeHTTP(recorder, request)
return recorder
}
func (s *CreateOrUpdateEnvironmentTestSuite) TestReturnsBadRequestWhenBadBody() {
s.body = []byte{}
recorder := s.recordRequest()
s.Equal(http.StatusBadRequest, recorder.Code)
}
func (s *CreateOrUpdateEnvironmentTestSuite) TestReturnsInternalServerErrorWhenManagerReturnsError() {
testError := tests.ErrDefault
s.manager.
On("CreateOrUpdate", s.id, mock.AnythingOfType("dto.ExecutionEnvironmentRequest")).
Return(false, testError)
recorder := s.recordRequest()
s.Equal(http.StatusInternalServerError, recorder.Code)
s.Contains(recorder.Body.String(), testError.Error())
}
func (s *CreateOrUpdateEnvironmentTestSuite) TestReturnsCreatedIfNewEnvironment() {
s.manager.
On("CreateOrUpdate", s.id, mock.AnythingOfType("dto.ExecutionEnvironmentRequest")).
Return(true, nil)
recorder := s.recordRequest()
s.Equal(http.StatusCreated, recorder.Code)
}
func (s *CreateOrUpdateEnvironmentTestSuite) TestReturnsNoContentIfNotNewEnvironment() {
s.manager.
On("CreateOrUpdate", s.id, mock.AnythingOfType("dto.ExecutionEnvironmentRequest")).
Return(false, nil)
recorder := s.recordRequest()
s.Equal(http.StatusNoContent, recorder.Code)
}
func (s *CreateOrUpdateEnvironmentTestSuite) TestReturnsNotFoundOnNonIntegerID() {
s.path = strings.Join([]string{BasePath, EnvironmentsPath, "/", "invalid-id"}, "")
recorder := s.recordRequest()
s.Equal(http.StatusNotFound, recorder.Code)
}
func (s *CreateOrUpdateEnvironmentTestSuite) TestFailsOnTooLargeID() {
tooLargeIntStr := strconv.Itoa(math.MaxInt64) + "0"
s.path = strings.Join([]string{BasePath, EnvironmentsPath, "/", tooLargeIntStr}, "")
recorder := s.recordRequest()
s.Equal(http.StatusBadRequest, recorder.Code)
}

12
internal/api/health.go Normal file
View File

@ -0,0 +1,12 @@
package api
import (
"net/http"
)
// Health handles the health route.
// It responds that the server is alive.
// If it is not, the response won't reach the client.
func Health(writer http.ResponseWriter, _ *http.Request) {
writer.WriteHeader(http.StatusNoContent)
}

View File

@ -0,0 +1,18 @@
package api
import (
"github.com/stretchr/testify/assert"
"net/http"
"net/http/httptest"
"testing"
)
func TestHealthRoute(t *testing.T) {
request, err := http.NewRequest(http.MethodGet, "/health", nil)
if err != nil {
t.Fatal(err)
}
recorder := httptest.NewRecorder()
http.HandlerFunc(Health).ServeHTTP(recorder, request)
assert.Equal(t, http.StatusNoContent, recorder.Code)
}

43
internal/api/helpers.go Normal file
View File

@ -0,0 +1,43 @@
package api
import (
"encoding/json"
"fmt"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"net/http"
)
func writeInternalServerError(writer http.ResponseWriter, err error, errorCode dto.ErrorCode) {
sendJSON(writer, &dto.InternalServerError{Message: err.Error(), ErrorCode: errorCode}, http.StatusInternalServerError)
}
func writeBadRequest(writer http.ResponseWriter, err error) {
sendJSON(writer, &dto.ClientError{Message: err.Error()}, http.StatusBadRequest)
}
func writeNotFound(writer http.ResponseWriter, err error) {
sendJSON(writer, &dto.ClientError{Message: err.Error()}, http.StatusNotFound)
}
func sendJSON(writer http.ResponseWriter, content interface{}, httpStatusCode int) {
writer.Header().Set("Content-Type", "application/json")
writer.WriteHeader(httpStatusCode)
response, err := json.Marshal(content)
if err != nil {
// cannot produce infinite recursive loop, since json.Marshal of dto.InternalServerError won't return an error
writeInternalServerError(writer, err, dto.ErrorUnknown)
return
}
if _, err = writer.Write(response); err != nil {
log.WithError(err).Error("Could not write JSON response")
http.Error(writer, err.Error(), http.StatusInternalServerError)
}
}
func parseJSONRequestBody(writer http.ResponseWriter, request *http.Request, structure interface{}) error {
if err := json.NewDecoder(request.Body).Decode(structure); err != nil {
writeBadRequest(writer, err)
return fmt.Errorf("error parsing JSON request body: %w", err)
}
return nil
}

159
internal/api/runners.go Normal file
View File

@ -0,0 +1,159 @@
package api
import (
"errors"
"fmt"
"github.com/google/uuid"
"github.com/gorilla/mux"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/config"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"net/http"
"net/url"
)
const (
ExecutePath = "/execute"
WebsocketPath = "/websocket"
UpdateFileSystemPath = "/files"
DeleteRoute = "deleteRunner"
RunnerIDKey = "runnerId"
ExecutionIDKey = "executionID"
ProvideRoute = "provideRunner"
)
type RunnerController struct {
manager runner.Manager
runnerRouter *mux.Router
}
// ConfigureRoutes configures a given router with the runner routes of our API.
func (r *RunnerController) ConfigureRoutes(router *mux.Router) {
runnersRouter := router.PathPrefix(RunnersPath).Subrouter()
runnersRouter.HandleFunc("", r.provide).Methods(http.MethodPost).Name(ProvideRoute)
r.runnerRouter = runnersRouter.PathPrefix(fmt.Sprintf("/{%s}", RunnerIDKey)).Subrouter()
r.runnerRouter.Use(r.findRunnerMiddleware)
r.runnerRouter.HandleFunc(UpdateFileSystemPath, r.updateFileSystem).Methods(http.MethodPatch).
Name(UpdateFileSystemPath)
r.runnerRouter.HandleFunc(ExecutePath, r.execute).Methods(http.MethodPost).Name(ExecutePath)
r.runnerRouter.HandleFunc(WebsocketPath, r.connectToRunner).Methods(http.MethodGet).Name(WebsocketPath)
r.runnerRouter.HandleFunc("", r.delete).Methods(http.MethodDelete).Name(DeleteRoute)
}
// provide handles the provide runners API route.
// It tries to respond with the id of a unused runner.
// This runner is then reserved for future use.
func (r *RunnerController) provide(writer http.ResponseWriter, request *http.Request) {
runnerRequest := new(dto.RunnerRequest)
if err := parseJSONRequestBody(writer, request, runnerRequest); err != nil {
return
}
environmentID := runner.EnvironmentID(runnerRequest.ExecutionEnvironmentID)
nextRunner, err := r.manager.Claim(environmentID, runnerRequest.InactivityTimeout)
if err != nil {
switch err {
case runner.ErrUnknownExecutionEnvironment:
writeNotFound(writer, err)
case runner.ErrNoRunnersAvailable:
log.WithField("environment", environmentID).Warn("No runners available")
writeInternalServerError(writer, err, dto.ErrorNomadOverload)
default:
writeInternalServerError(writer, err, dto.ErrorUnknown)
}
return
}
sendJSON(writer, &dto.RunnerResponse{ID: nextRunner.ID(), MappedPorts: nextRunner.MappedPorts()}, http.StatusOK)
}
// updateFileSystem handles the files API route.
// It takes an dto.UpdateFileSystemRequest and sends it to the runner for processing.
func (r *RunnerController) updateFileSystem(writer http.ResponseWriter, request *http.Request) {
fileCopyRequest := new(dto.UpdateFileSystemRequest)
if err := parseJSONRequestBody(writer, request, fileCopyRequest); err != nil {
return
}
targetRunner, _ := runner.FromContext(request.Context())
if err := targetRunner.UpdateFileSystem(fileCopyRequest); err != nil {
log.WithError(err).Error("Could not perform the requested updateFileSystem.")
writeInternalServerError(writer, err, dto.ErrorUnknown)
return
}
writer.WriteHeader(http.StatusNoContent)
}
// execute handles the execute API route.
// It takes an ExecutionRequest and stores it for a runner.
// It returns a url to connect to for a websocket connection to this execution in the corresponding runner.
func (r *RunnerController) execute(writer http.ResponseWriter, request *http.Request) {
executionRequest := new(dto.ExecutionRequest)
if err := parseJSONRequestBody(writer, request, executionRequest); err != nil {
return
}
var scheme string
if config.Config.Server.TLS {
scheme = "wss"
} else {
scheme = "ws"
}
targetRunner, _ := runner.FromContext(request.Context())
path, err := r.runnerRouter.Get(WebsocketPath).URL(RunnerIDKey, targetRunner.ID())
if err != nil {
log.WithError(err).Error("Could not create runner websocket URL.")
writeInternalServerError(writer, err, dto.ErrorUnknown)
return
}
newUUID, err := uuid.NewRandom()
if err != nil {
log.WithError(err).Error("Could not create execution id")
writeInternalServerError(writer, err, dto.ErrorUnknown)
return
}
id := runner.ExecutionID(newUUID.String())
targetRunner.Add(id, executionRequest)
webSocketURL := url.URL{
Scheme: scheme,
Host: request.Host,
Path: path.String(),
RawQuery: fmt.Sprintf("%s=%s", ExecutionIDKey, id),
}
sendJSON(writer, &dto.ExecutionResponse{WebSocketURL: webSocketURL.String()}, http.StatusOK)
}
// The findRunnerMiddleware looks up the runnerId for routes containing it
// and adds the runner to the context of the request.
func (r *RunnerController) findRunnerMiddleware(next http.Handler) http.Handler {
return http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) {
runnerID := mux.Vars(request)[RunnerIDKey]
targetRunner, err := r.manager.Get(runnerID)
if err != nil {
writeNotFound(writer, err)
return
}
ctx := runner.NewContext(request.Context(), targetRunner)
requestWithRunner := request.WithContext(ctx)
next.ServeHTTP(writer, requestWithRunner)
})
}
// delete handles the delete runner API route.
// It destroys the given runner on the executor and removes it from the used runners list.
func (r *RunnerController) delete(writer http.ResponseWriter, request *http.Request) {
targetRunner, _ := runner.FromContext(request.Context())
err := r.manager.Return(targetRunner)
if err != nil {
if errors.Is(err, runner.ErrUnknownExecutionEnvironment) {
writeNotFound(writer, err)
} else {
writeInternalServerError(writer, err, dto.ErrorNomadInternalServerError)
}
return
}
writer.WriteHeader(http.StatusNoContent)
}

View File

@ -0,0 +1,351 @@
package api
import (
"bytes"
"encoding/json"
"fmt"
"github.com/gorilla/mux"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
)
type MiddlewareTestSuite struct {
suite.Suite
manager *runner.ManagerMock
router *mux.Router
runner runner.Runner
capturedRunner runner.Runner
runnerRequest func(string) *http.Request
}
func (s *MiddlewareTestSuite) SetupTest() {
s.manager = &runner.ManagerMock{}
s.runner = runner.NewNomadJob(tests.DefaultRunnerID, nil, nil, nil)
s.capturedRunner = nil
s.runnerRequest = func(runnerId string) *http.Request {
path, err := s.router.Get("test-runner-id").URL(RunnerIDKey, runnerId)
s.Require().NoError(err)
request, err := http.NewRequest(http.MethodPost, path.String(), nil)
s.Require().NoError(err)
return request
}
runnerRouteHandler := func(writer http.ResponseWriter, request *http.Request) {
var ok bool
s.capturedRunner, ok = runner.FromContext(request.Context())
if ok {
writer.WriteHeader(http.StatusOK)
} else {
writer.WriteHeader(http.StatusInternalServerError)
}
}
s.router = mux.NewRouter()
runnerController := &RunnerController{s.manager, s.router}
s.router.Use(runnerController.findRunnerMiddleware)
s.router.HandleFunc(fmt.Sprintf("/test/{%s}", RunnerIDKey), runnerRouteHandler).Name("test-runner-id")
}
func TestMiddlewareTestSuite(t *testing.T) {
suite.Run(t, new(MiddlewareTestSuite))
}
func (s *MiddlewareTestSuite) TestFindRunnerMiddlewareIfRunnerExists() {
s.manager.On("Get", s.runner.ID()).Return(s.runner, nil)
recorder := httptest.NewRecorder()
s.router.ServeHTTP(recorder, s.runnerRequest(s.runner.ID()))
s.Equal(http.StatusOK, recorder.Code)
s.Equal(s.runner, s.capturedRunner)
}
func (s *MiddlewareTestSuite) TestFindRunnerMiddlewareIfRunnerDoesNotExist() {
invalidID := "some-invalid-runner-id"
s.manager.On("Get", invalidID).Return(nil, runner.ErrRunnerNotFound)
recorder := httptest.NewRecorder()
s.router.ServeHTTP(recorder, s.runnerRequest(invalidID))
s.Equal(http.StatusNotFound, recorder.Code)
}
func TestRunnerRouteTestSuite(t *testing.T) {
suite.Run(t, new(RunnerRouteTestSuite))
}
type RunnerRouteTestSuite struct {
suite.Suite
runnerManager *runner.ManagerMock
router *mux.Router
runner runner.Runner
executionID runner.ExecutionID
}
func (s *RunnerRouteTestSuite) SetupTest() {
s.runnerManager = &runner.ManagerMock{}
s.router = NewRouter(s.runnerManager, nil)
s.runner = runner.NewNomadJob("some-id", nil, nil, nil)
s.executionID = "execution-id"
s.runner.Add(s.executionID, &dto.ExecutionRequest{})
s.runnerManager.On("Get", s.runner.ID()).Return(s.runner, nil)
}
func TestProvideRunnerTestSuite(t *testing.T) {
suite.Run(t, new(ProvideRunnerTestSuite))
}
type ProvideRunnerTestSuite struct {
RunnerRouteTestSuite
defaultRequest *http.Request
path string
}
func (s *ProvideRunnerTestSuite) SetupTest() {
s.RunnerRouteTestSuite.SetupTest()
path, err := s.router.Get(ProvideRoute).URL()
s.Require().NoError(err)
s.path = path.String()
runnerRequest := dto.RunnerRequest{ExecutionEnvironmentID: tests.DefaultEnvironmentIDAsInteger}
body, err := json.Marshal(runnerRequest)
s.Require().NoError(err)
s.defaultRequest, err = http.NewRequest(http.MethodPost, s.path, bytes.NewReader(body))
s.Require().NoError(err)
}
func (s *ProvideRunnerTestSuite) TestValidRequestReturnsRunner() {
s.runnerManager.On("Claim", mock.AnythingOfType("runner.EnvironmentID"),
mock.AnythingOfType("int")).Return(s.runner, nil)
recorder := httptest.NewRecorder()
s.router.ServeHTTP(recorder, s.defaultRequest)
s.Equal(http.StatusOK, recorder.Code)
s.Run("response contains runnerId", func() {
var runnerResponse dto.RunnerResponse
err := json.NewDecoder(recorder.Result().Body).Decode(&runnerResponse)
s.Require().NoError(err)
_ = recorder.Result().Body.Close()
s.Equal(s.runner.ID(), runnerResponse.ID)
})
}
func (s *ProvideRunnerTestSuite) TestInvalidRequestReturnsBadRequest() {
badRequest, err := http.NewRequest(http.MethodPost, s.path, strings.NewReader(""))
s.Require().NoError(err)
recorder := httptest.NewRecorder()
s.router.ServeHTTP(recorder, badRequest)
s.Equal(http.StatusBadRequest, recorder.Code)
}
func (s *ProvideRunnerTestSuite) TestWhenExecutionEnvironmentDoesNotExistReturnsNotFound() {
s.runnerManager.
On("Claim", mock.AnythingOfType("runner.EnvironmentID"), mock.AnythingOfType("int")).
Return(nil, runner.ErrUnknownExecutionEnvironment)
recorder := httptest.NewRecorder()
s.router.ServeHTTP(recorder, s.defaultRequest)
s.Equal(http.StatusNotFound, recorder.Code)
}
func (s *ProvideRunnerTestSuite) TestWhenNoRunnerAvailableReturnsNomadOverload() {
s.runnerManager.On("Claim", mock.AnythingOfType("runner.EnvironmentID"), mock.AnythingOfType("int")).
Return(nil, runner.ErrNoRunnersAvailable)
recorder := httptest.NewRecorder()
s.router.ServeHTTP(recorder, s.defaultRequest)
s.Equal(http.StatusInternalServerError, recorder.Code)
var internalServerError dto.InternalServerError
err := json.NewDecoder(recorder.Result().Body).Decode(&internalServerError)
s.Require().NoError(err)
_ = recorder.Result().Body.Close()
s.Equal(dto.ErrorNomadOverload, internalServerError.ErrorCode)
}
func (s *RunnerRouteTestSuite) TestExecuteRoute() {
path, err := s.router.Get(ExecutePath).URL(RunnerIDKey, s.runner.ID())
s.Require().NoError(err)
s.Run("valid request", func() {
recorder := httptest.NewRecorder()
executionRequest := dto.ExecutionRequest{
Command: "command",
TimeLimit: 10,
Environment: nil,
}
body, err := json.Marshal(executionRequest)
s.Require().NoError(err)
request, err := http.NewRequest(http.MethodPost, path.String(), bytes.NewReader(body))
s.Require().NoError(err)
s.router.ServeHTTP(recorder, request)
var webSocketResponse dto.ExecutionResponse
err = json.NewDecoder(recorder.Result().Body).Decode(&webSocketResponse)
s.Require().NoError(err)
s.Equal(http.StatusOK, recorder.Code)
s.Run("creates an execution request for the runner", func() {
webSocketURL, err := url.Parse(webSocketResponse.WebSocketURL)
s.Require().NoError(err)
executionID := webSocketURL.Query().Get(ExecutionIDKey)
storedExecutionRequest, ok := s.runner.Pop(runner.ExecutionID(executionID))
s.True(ok, "No execution request with this id: ", executionID)
s.Equal(&executionRequest, storedExecutionRequest)
})
})
s.Run("invalid request", func() {
recorder := httptest.NewRecorder()
body := ""
request, err := http.NewRequest(http.MethodPost, path.String(), strings.NewReader(body))
s.Require().NoError(err)
s.router.ServeHTTP(recorder, request)
s.Equal(http.StatusBadRequest, recorder.Code)
})
}
func TestUpdateFileSystemRouteTestSuite(t *testing.T) {
suite.Run(t, new(UpdateFileSystemRouteTestSuite))
}
type UpdateFileSystemRouteTestSuite struct {
RunnerRouteTestSuite
path string
recorder *httptest.ResponseRecorder
runnerMock *runner.RunnerMock
}
func (s *UpdateFileSystemRouteTestSuite) SetupTest() {
s.RunnerRouteTestSuite.SetupTest()
routeURL, err := s.router.Get(UpdateFileSystemPath).URL(RunnerIDKey, tests.DefaultMockID)
s.Require().NoError(err)
s.path = routeURL.String()
s.runnerMock = &runner.RunnerMock{}
s.runnerManager.On("Get", tests.DefaultMockID).Return(s.runnerMock, nil)
s.recorder = httptest.NewRecorder()
}
func (s *UpdateFileSystemRouteTestSuite) TestUpdateFileSystemReturnsNoContentOnValidRequest() {
s.runnerMock.On("UpdateFileSystem", mock.AnythingOfType("*dto.UpdateFileSystemRequest")).Return(nil)
copyRequest := dto.UpdateFileSystemRequest{}
body, err := json.Marshal(copyRequest)
s.Require().NoError(err)
request, err := http.NewRequest(http.MethodPatch, s.path, bytes.NewReader(body))
s.Require().NoError(err)
s.router.ServeHTTP(s.recorder, request)
s.Equal(http.StatusNoContent, s.recorder.Code)
s.runnerMock.AssertCalled(s.T(), "UpdateFileSystem", mock.AnythingOfType("*dto.UpdateFileSystemRequest"))
}
func (s *UpdateFileSystemRouteTestSuite) TestUpdateFileSystemReturnsBadRequestOnInvalidRequestBody() {
request, err := http.NewRequest(http.MethodPatch, s.path, strings.NewReader(""))
s.Require().NoError(err)
s.router.ServeHTTP(s.recorder, request)
s.Equal(http.StatusBadRequest, s.recorder.Code)
}
func (s *UpdateFileSystemRouteTestSuite) TestUpdateFileSystemToNonExistingRunnerReturnsNotFound() {
invalidID := "some-invalid-runner-id"
s.runnerManager.On("Get", invalidID).Return(nil, runner.ErrRunnerNotFound)
path, err := s.router.Get(UpdateFileSystemPath).URL(RunnerIDKey, invalidID)
s.Require().NoError(err)
copyRequest := dto.UpdateFileSystemRequest{}
body, err := json.Marshal(copyRequest)
s.Require().NoError(err)
request, err := http.NewRequest(http.MethodPatch, path.String(), bytes.NewReader(body))
s.Require().NoError(err)
s.router.ServeHTTP(s.recorder, request)
s.Equal(http.StatusNotFound, s.recorder.Code)
}
func (s *UpdateFileSystemRouteTestSuite) TestUpdateFileSystemReturnsInternalServerErrorWhenCopyFailed() {
s.runnerMock.
On("UpdateFileSystem", mock.AnythingOfType("*dto.UpdateFileSystemRequest")).
Return(runner.ErrorFileCopyFailed)
copyRequest := dto.UpdateFileSystemRequest{}
body, err := json.Marshal(copyRequest)
s.Require().NoError(err)
request, err := http.NewRequest(http.MethodPatch, s.path, bytes.NewReader(body))
s.Require().NoError(err)
s.router.ServeHTTP(s.recorder, request)
s.Equal(http.StatusInternalServerError, s.recorder.Code)
}
func TestDeleteRunnerRouteTestSuite(t *testing.T) {
suite.Run(t, new(DeleteRunnerRouteTestSuite))
}
type DeleteRunnerRouteTestSuite struct {
RunnerRouteTestSuite
path string
}
func (s *DeleteRunnerRouteTestSuite) SetupTest() {
s.RunnerRouteTestSuite.SetupTest()
deleteURL, err := s.router.Get(DeleteRoute).URL(RunnerIDKey, s.runner.ID())
s.Require().NoError(err)
s.path = deleteURL.String()
}
func (s *DeleteRunnerRouteTestSuite) TestValidRequestReturnsNoContent() {
s.runnerManager.On("Return", s.runner).Return(nil)
recorder := httptest.NewRecorder()
request, err := http.NewRequest(http.MethodDelete, s.path, nil)
s.Require().NoError(err)
s.router.ServeHTTP(recorder, request)
s.Equal(http.StatusNoContent, recorder.Code)
s.Run("runner was returned to runner manager", func() {
s.runnerManager.AssertCalled(s.T(), "Return", s.runner)
})
}
func (s *DeleteRunnerRouteTestSuite) TestReturnInternalServerErrorWhenApiCallToNomadFailed() {
s.runnerManager.On("Return", s.runner).Return(tests.ErrDefault)
recorder := httptest.NewRecorder()
request, err := http.NewRequest(http.MethodDelete, s.path, nil)
s.Require().NoError(err)
s.router.ServeHTTP(recorder, request)
s.Equal(http.StatusInternalServerError, recorder.Code)
}
func (s *DeleteRunnerRouteTestSuite) TestDeleteInvalidRunnerIdReturnsNotFound() {
s.runnerManager.On("Get", mock.AnythingOfType("string")).Return(nil, tests.ErrDefault)
deleteURL, err := s.router.Get(DeleteRoute).URL(RunnerIDKey, "1nv4l1dID")
s.Require().NoError(err)
deletePath := deleteURL.String()
recorder := httptest.NewRecorder()
request, err := http.NewRequest(http.MethodDelete, deletePath, nil)
s.Require().NoError(err)
s.router.ServeHTTP(recorder, request)
s.Equal(http.StatusNotFound, recorder.Code)
}

283
internal/api/websocket.go Normal file
View File

@ -0,0 +1,283 @@
package api
import (
"context"
"encoding/json"
"errors"
"fmt"
"github.com/gorilla/websocket"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"io"
"net/http"
)
const CodeOceanToRawReaderBufferSize = 1024
var ErrUnknownExecutionID = errors.New("execution id unknown")
type webSocketConnection interface {
WriteMessage(messageType int, data []byte) error
Close() error
NextReader() (messageType int, r io.Reader, err error)
CloseHandler() func(code int, text string) error
SetCloseHandler(handler func(code int, text string) error)
}
type WebSocketReader interface {
io.Reader
startReadInputLoop() context.CancelFunc
}
// codeOceanToRawReader is an io.Reader implementation that provides the content of the WebSocket connection
// to CodeOcean. You have to start the Reader by calling readInputLoop. After that you can use the Read function.
type codeOceanToRawReader struct {
connection webSocketConnection
// A buffered channel of bytes is used to store data coming from CodeOcean via WebSocket
// and retrieve it when Read(..) is called. Since channels are thread-safe, we use one here
// instead of bytes.Buffer.
buffer chan byte
}
func newCodeOceanToRawReader(connection webSocketConnection) *codeOceanToRawReader {
return &codeOceanToRawReader{
connection: connection,
buffer: make(chan byte, CodeOceanToRawReaderBufferSize),
}
}
// readInputLoop reads from the WebSocket connection and buffers the user's input.
// This is necessary because input must be read for the connection to handle special messages like close and call the
// CloseHandler.
func (cr *codeOceanToRawReader) readInputLoop(ctx context.Context) {
readMessage := make(chan bool)
for {
var messageType int
var reader io.Reader
var err error
go func() {
messageType, reader, err = cr.connection.NextReader()
readMessage <- true
}()
select {
case <-readMessage:
case <-ctx.Done():
return
}
if err != nil {
log.WithError(err).Warn("Error reading client message")
return
}
if messageType != websocket.TextMessage {
log.WithField("messageType", messageType).Warn("Received message of wrong type")
return
}
message, err := io.ReadAll(reader)
if err != nil {
log.WithError(err).Warn("error while reading WebSocket message")
return
}
for _, character := range message {
select {
case cr.buffer <- character:
case <-ctx.Done():
return
}
}
}
}
// startReadInputLoop start the read input loop asynchronously and returns a context.CancelFunc which can be used
// to cancel the read input loop.
func (cr *codeOceanToRawReader) startReadInputLoop() context.CancelFunc {
ctx, cancel := context.WithCancel(context.Background())
go cr.readInputLoop(ctx)
return cancel
}
// Read implements the io.Reader interface.
// It returns bytes from the buffer.
func (cr *codeOceanToRawReader) Read(p []byte) (int, error) {
if len(p) == 0 {
return 0, nil
}
// Ensure to not return until at least one byte has been read to avoid busy waiting.
p[0] = <-cr.buffer
var n int
for n = 1; n < len(p); n++ {
select {
case p[n] = <-cr.buffer:
default:
return n, nil
}
}
return n, nil
}
// rawToCodeOceanWriter is an io.Writer implementation that, when written to, wraps the written data in the appropriate
// json structure and sends it to the CodeOcean via WebSocket.
type rawToCodeOceanWriter struct {
proxy *webSocketProxy
outputType dto.WebSocketMessageType
}
// Write implements the io.Writer interface.
// The passed data is forwarded to the WebSocket to CodeOcean.
func (rc *rawToCodeOceanWriter) Write(p []byte) (int, error) {
err := rc.proxy.sendToClient(dto.WebSocketMessage{Type: rc.outputType, Data: string(p)})
return len(p), err
}
// webSocketProxy is an encapsulation of logic for forwarding between Runners and CodeOcean.
type webSocketProxy struct {
userExit chan bool
connection webSocketConnection
Stdin WebSocketReader
Stdout io.Writer
Stderr io.Writer
}
// upgradeConnection upgrades a connection to a websocket and returns a webSocketProxy for this connection.
func upgradeConnection(writer http.ResponseWriter, request *http.Request) (webSocketConnection, error) {
connUpgrader := websocket.Upgrader{}
connection, err := connUpgrader.Upgrade(writer, request, nil)
if err != nil {
log.WithError(err).Warn("Connection upgrade failed")
return nil, fmt.Errorf("error upgrading the connection: %w", err)
}
return connection, nil
}
// newWebSocketProxy returns a initiated and started webSocketProxy.
// As this proxy is already started, a start message is send to the client.
func newWebSocketProxy(connection webSocketConnection) (*webSocketProxy, error) {
stdin := newCodeOceanToRawReader(connection)
proxy := &webSocketProxy{
connection: connection,
Stdin: stdin,
userExit: make(chan bool),
}
proxy.Stdout = &rawToCodeOceanWriter{proxy: proxy, outputType: dto.WebSocketOutputStdout}
proxy.Stderr = &rawToCodeOceanWriter{proxy: proxy, outputType: dto.WebSocketOutputStderr}
err := proxy.sendToClient(dto.WebSocketMessage{Type: dto.WebSocketMetaStart})
if err != nil {
return nil, err
}
closeHandler := connection.CloseHandler()
connection.SetCloseHandler(func(code int, text string) error {
//nolint:errcheck // The default close handler always returns nil, so the error can be safely ignored.
_ = closeHandler(code, text)
close(proxy.userExit)
return nil
})
return proxy, nil
}
// waitForExit waits for an exit of either the runner (when the command terminates) or the client closing the WebSocket
// and handles WebSocket exit messages.
func (wp *webSocketProxy) waitForExit(exit <-chan runner.ExitInfo, cancelExecution context.CancelFunc) {
defer wp.close()
cancelInputLoop := wp.Stdin.startReadInputLoop()
var exitInfo runner.ExitInfo
select {
case exitInfo = <-exit:
cancelInputLoop()
log.Info("Execution returned")
case <-wp.userExit:
cancelInputLoop()
cancelExecution()
log.Info("Client closed the connection")
return
}
if errors.Is(exitInfo.Err, context.DeadlineExceeded) || errors.Is(exitInfo.Err, runner.ErrorRunnerInactivityTimeout) {
err := wp.sendToClient(dto.WebSocketMessage{Type: dto.WebSocketMetaTimeout})
if err != nil {
log.WithError(err).Warn("Failed to send timeout message to client")
}
return
} else if exitInfo.Err != nil {
errorMessage := "Error executing the request"
log.WithError(exitInfo.Err).Warn(errorMessage)
err := wp.sendToClient(dto.WebSocketMessage{Type: dto.WebSocketOutputError, Data: errorMessage})
if err != nil {
log.WithError(err).Warn("Failed to send output error message to client")
}
return
}
log.WithField("exit_code", exitInfo.Code).Debug()
err := wp.sendToClient(dto.WebSocketMessage{
Type: dto.WebSocketExit,
ExitCode: exitInfo.Code,
})
if err != nil {
return
}
}
func (wp *webSocketProxy) sendToClient(message dto.WebSocketMessage) error {
encodedMessage, err := json.Marshal(message)
if err != nil {
log.WithField("message", message).WithError(err).Warn("Marshal error")
wp.closeWithError("Error creating message")
return fmt.Errorf("error marshaling WebSocket message: %w", err)
}
err = wp.connection.WriteMessage(websocket.TextMessage, encodedMessage)
if err != nil {
errorMessage := "Error writing the exit message"
log.WithError(err).Warn(errorMessage)
wp.closeWithError(errorMessage)
return fmt.Errorf("error writing WebSocket message: %w", err)
}
return nil
}
func (wp *webSocketProxy) closeWithError(message string) {
err := wp.connection.WriteMessage(websocket.CloseMessage,
websocket.FormatCloseMessage(websocket.CloseInternalServerErr, message))
if err != nil {
log.WithError(err).Warn("Error during websocket close")
}
}
func (wp *webSocketProxy) close() {
err := wp.connection.WriteMessage(websocket.CloseMessage,
websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""))
_ = wp.connection.Close()
if err != nil {
log.WithError(err).Warn("Error during websocket close")
}
}
// connectToRunner is the endpoint for websocket connections.
func (r *RunnerController) connectToRunner(writer http.ResponseWriter, request *http.Request) {
targetRunner, _ := runner.FromContext(request.Context())
executionID := runner.ExecutionID(request.URL.Query().Get(ExecutionIDKey))
executionRequest, ok := targetRunner.Pop(executionID)
if !ok {
writeNotFound(writer, ErrUnknownExecutionID)
return
}
connection, err := upgradeConnection(writer, request)
if err != nil {
writeInternalServerError(writer, err, dto.ErrorUnknown)
return
}
proxy, err := newWebSocketProxy(connection)
if err != nil {
return
}
log.WithField("runnerId", targetRunner.ID()).WithField("executionID", executionID).Info("Running execution")
exit, cancel := targetRunner.ExecuteInteractively(executionRequest, proxy.Stdin, proxy.Stdout, proxy.Stderr)
proxy.waitForExit(exit, cancel)
}

View File

@ -0,0 +1,93 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package api
import (
io "io"
mock "github.com/stretchr/testify/mock"
)
// webSocketConnectionMock is an autogenerated mock type for the webSocketConnection type
type webSocketConnectionMock struct {
mock.Mock
}
// Close provides a mock function with given fields:
func (_m *webSocketConnectionMock) Close() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// CloseHandler provides a mock function with given fields:
func (_m *webSocketConnectionMock) CloseHandler() func(int, string) error {
ret := _m.Called()
var r0 func(int, string) error
if rf, ok := ret.Get(0).(func() func(int, string) error); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(func(int, string) error)
}
}
return r0
}
// NextReader provides a mock function with given fields:
func (_m *webSocketConnectionMock) NextReader() (int, io.Reader, error) {
ret := _m.Called()
var r0 int
if rf, ok := ret.Get(0).(func() int); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(int)
}
var r1 io.Reader
if rf, ok := ret.Get(1).(func() io.Reader); ok {
r1 = rf()
} else {
if ret.Get(1) != nil {
r1 = ret.Get(1).(io.Reader)
}
}
var r2 error
if rf, ok := ret.Get(2).(func() error); ok {
r2 = rf()
} else {
r2 = ret.Error(2)
}
return r0, r1, r2
}
// SetCloseHandler provides a mock function with given fields: h
func (_m *webSocketConnectionMock) SetCloseHandler(h func(int, string) error) {
_m.Called(h)
}
// WriteMessage provides a mock function with given fields: messageType, data
func (_m *webSocketConnectionMock) WriteMessage(messageType int, data []byte) error {
ret := _m.Called(messageType, data)
var r0 error
if rf, ok := ret.Get(0).(func(int, []byte) error); ok {
r0 = rf(messageType, data)
} else {
r0 = ret.Error(0)
}
return r0
}

View File

@ -0,0 +1,490 @@
package api
import (
"bufio"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"github.com/gorilla/mux"
"github.com/gorilla/websocket"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests/helpers"
"io"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"time"
)
func TestWebSocketTestSuite(t *testing.T) {
suite.Run(t, new(WebSocketTestSuite))
}
type WebSocketTestSuite struct {
suite.Suite
router *mux.Router
executionID runner.ExecutionID
runner runner.Runner
apiMock *nomad.ExecutorAPIMock
server *httptest.Server
}
func (s *WebSocketTestSuite) SetupTest() {
runnerID := "runner-id"
s.runner, s.apiMock = newNomadAllocationWithMockedAPIClient(runnerID)
// default execution
s.executionID = "execution-id"
s.runner.Add(s.executionID, &executionRequestHead)
mockAPIExecuteHead(s.apiMock)
runnerManager := &runner.ManagerMock{}
runnerManager.On("Get", s.runner.ID()).Return(s.runner, nil)
s.router = NewRouter(runnerManager, nil)
s.server = httptest.NewServer(s.router)
}
func (s *WebSocketTestSuite) TearDownTest() {
s.server.Close()
}
func (s *WebSocketTestSuite) TestWebsocketConnectionCanBeEstablished() {
wsURL, err := s.webSocketURL("ws", s.runner.ID(), s.executionID)
s.Require().NoError(err)
_, _, err = websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
}
func (s *WebSocketTestSuite) TestWebsocketReturns404IfExecutionDoesNotExist() {
wsURL, err := s.webSocketURL("ws", s.runner.ID(), "invalid-execution-id")
s.Require().NoError(err)
_, response, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().ErrorIs(err, websocket.ErrBadHandshake)
s.Equal(http.StatusNotFound, response.StatusCode)
}
func (s *WebSocketTestSuite) TestWebsocketReturns400IfRequestedViaHttp() {
wsURL, err := s.webSocketURL("http", s.runner.ID(), s.executionID)
s.Require().NoError(err)
response, err := http.Get(wsURL.String())
s.Require().NoError(err)
// This functionality is implemented by the WebSocket library.
s.Equal(http.StatusBadRequest, response.StatusCode)
}
func (s *WebSocketTestSuite) TestWebsocketConnection() {
wsURL, err := s.webSocketURL("ws", s.runner.ID(), s.executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
err = connection.SetReadDeadline(time.Now().Add(5 * time.Second))
s.Require().NoError(err)
s.Run("Receives start message", func() {
message, err := helpers.ReceiveNextWebSocketMessage(connection)
s.Require().NoError(err)
s.Equal(dto.WebSocketMetaStart, message.Type)
})
s.Run("Executes the request in the runner", func() {
<-time.After(100 * time.Millisecond)
s.apiMock.AssertCalled(s.T(), "ExecuteCommand",
mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything)
})
s.Run("Can send input", func() {
err = connection.WriteMessage(websocket.TextMessage, []byte("Hello World\n"))
s.Require().NoError(err)
})
messages, err := helpers.ReceiveAllWebSocketMessages(connection)
s.Require().Error(err)
s.True(websocket.IsCloseError(err, websocket.CloseNormalClosure))
s.Run("Receives output message", func() {
stdout, _, _ := helpers.WebSocketOutputMessages(messages)
s.Equal("Hello World", stdout)
})
s.Run("Receives exit message", func() {
controlMessages := helpers.WebSocketControlMessages(messages)
s.Require().Equal(1, len(controlMessages))
s.Equal(dto.WebSocketExit, controlMessages[0].Type)
})
}
func (s *WebSocketTestSuite) TestCancelWebSocketConnection() {
executionID := runner.ExecutionID("sleeping-execution")
s.runner.Add(executionID, &executionRequestSleep)
canceled := mockAPIExecuteSleep(s.apiMock)
wsURL, err := webSocketURL("ws", s.server, s.router, s.runner.ID(), executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
message, err := helpers.ReceiveNextWebSocketMessage(connection)
s.Require().NoError(err)
s.Equal(dto.WebSocketMetaStart, message.Type)
select {
case <-canceled:
s.Fail("ExecuteInteractively canceled unexpected")
default:
}
err = connection.WriteControl(websocket.CloseMessage,
websocket.FormatCloseMessage(websocket.CloseNormalClosure, ""), time.Now().Add(time.Second))
s.Require().NoError(err)
select {
case <-canceled:
case <-time.After(time.Second):
s.Fail("ExecuteInteractively not canceled")
}
}
func (s *WebSocketTestSuite) TestWebSocketConnectionTimeout() {
executionID := runner.ExecutionID("time-out-execution")
limitExecution := executionRequestSleep
limitExecution.TimeLimit = 2
s.runner.Add(executionID, &limitExecution)
canceled := mockAPIExecuteSleep(s.apiMock)
wsURL, err := webSocketURL("ws", s.server, s.router, s.runner.ID(), executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
message, err := helpers.ReceiveNextWebSocketMessage(connection)
s.Require().NoError(err)
s.Equal(dto.WebSocketMetaStart, message.Type)
select {
case <-canceled:
s.Fail("ExecuteInteractively canceled unexpected")
case <-time.After(time.Duration(limitExecution.TimeLimit-1) * time.Second):
<-time.After(time.Second)
}
select {
case <-canceled:
case <-time.After(time.Second):
s.Fail("ExecuteInteractively not canceled")
}
message, err = helpers.ReceiveNextWebSocketMessage(connection)
s.Require().NoError(err)
s.Equal(dto.WebSocketMetaTimeout, message.Type)
}
func (s *WebSocketTestSuite) TestWebsocketStdoutAndStderr() {
executionID := runner.ExecutionID("ls-execution")
s.runner.Add(executionID, &executionRequestLs)
mockAPIExecuteLs(s.apiMock)
wsURL, err := webSocketURL("ws", s.server, s.router, s.runner.ID(), executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
messages, err := helpers.ReceiveAllWebSocketMessages(connection)
s.Require().Error(err)
s.True(websocket.IsCloseError(err, websocket.CloseNormalClosure))
stdout, stderr, _ := helpers.WebSocketOutputMessages(messages)
s.Contains(stdout, "existing-file")
s.Contains(stderr, "non-existing-file")
}
func (s *WebSocketTestSuite) TestWebsocketError() {
executionID := runner.ExecutionID("error-execution")
s.runner.Add(executionID, &executionRequestError)
mockAPIExecuteError(s.apiMock)
wsURL, err := webSocketURL("ws", s.server, s.router, s.runner.ID(), executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
messages, err := helpers.ReceiveAllWebSocketMessages(connection)
s.Require().Error(err)
s.True(websocket.IsCloseError(err, websocket.CloseNormalClosure))
_, _, errMessages := helpers.WebSocketOutputMessages(messages)
s.Equal(1, len(errMessages))
s.Equal("Error executing the request", errMessages[0])
}
func (s *WebSocketTestSuite) TestWebsocketNonZeroExit() {
executionID := runner.ExecutionID("exit-execution")
s.runner.Add(executionID, &executionRequestExitNonZero)
mockAPIExecuteExitNonZero(s.apiMock)
wsURL, err := webSocketURL("ws", s.server, s.router, s.runner.ID(), executionID)
s.Require().NoError(err)
connection, _, err := websocket.DefaultDialer.Dial(wsURL.String(), nil)
s.Require().NoError(err)
messages, err := helpers.ReceiveAllWebSocketMessages(connection)
s.Require().Error(err)
s.True(websocket.IsCloseError(err, websocket.CloseNormalClosure))
controlMessages := helpers.WebSocketControlMessages(messages)
s.Equal(2, len(controlMessages))
s.Equal(&dto.WebSocketMessage{Type: dto.WebSocketExit, ExitCode: 42}, controlMessages[1])
}
func TestWebsocketTLS(t *testing.T) {
runnerID := "runner-id"
r, apiMock := newNomadAllocationWithMockedAPIClient(runnerID)
executionID := runner.ExecutionID("execution-id")
r.Add(executionID, &executionRequestLs)
mockAPIExecuteLs(apiMock)
runnerManager := &runner.ManagerMock{}
runnerManager.On("Get", r.ID()).Return(r, nil)
router := NewRouter(runnerManager, nil)
server, err := helpers.StartTLSServer(t, router)
require.NoError(t, err)
defer server.Close()
wsURL, err := webSocketURL("wss", server, router, runnerID, executionID)
require.NoError(t, err)
config := &tls.Config{RootCAs: nil, InsecureSkipVerify: true} //nolint:gosec // test needs self-signed cert
d := websocket.Dialer{TLSClientConfig: config}
connection, _, err := d.Dial(wsURL.String(), nil)
require.NoError(t, err)
message, err := helpers.ReceiveNextWebSocketMessage(connection)
require.NoError(t, err)
assert.Equal(t, dto.WebSocketMetaStart, message.Type)
_, err = helpers.ReceiveAllWebSocketMessages(connection)
require.Error(t, err)
assert.True(t, websocket.IsCloseError(err, websocket.CloseNormalClosure))
}
func TestRawToCodeOceanWriter(t *testing.T) {
testMessage := "test"
var message []byte
connectionMock := &webSocketConnectionMock{}
connectionMock.On("WriteMessage", mock.AnythingOfType("int"), mock.AnythingOfType("[]uint8")).
Run(func(args mock.Arguments) {
var ok bool
message, ok = args.Get(1).([]byte)
require.True(t, ok)
}).
Return(nil)
connectionMock.On("CloseHandler").Return(nil)
connectionMock.On("SetCloseHandler", mock.Anything).Return()
proxy, err := newWebSocketProxy(connectionMock)
require.NoError(t, err)
writer := &rawToCodeOceanWriter{
proxy: proxy,
outputType: dto.WebSocketOutputStdout,
}
_, err = writer.Write([]byte(testMessage))
require.NoError(t, err)
expected, err := json.Marshal(struct {
Type string `json:"type"`
Data string `json:"data"`
}{string(dto.WebSocketOutputStdout), testMessage})
require.NoError(t, err)
assert.Equal(t, expected, message)
}
func TestCodeOceanToRawReaderReturnsOnlyAfterOneByteWasRead(t *testing.T) {
reader := newCodeOceanToRawReader(nil)
read := make(chan bool)
go func() {
//nolint:makezero // we can't make zero initial length here as the reader otherwise doesn't block
p := make([]byte, 10)
_, err := reader.Read(p)
require.NoError(t, err)
read <- true
}()
t.Run("Does not return immediately when there is no data", func(t *testing.T) {
assert.False(t, tests.ChannelReceivesSomething(read, tests.ShortTimeout))
})
t.Run("Returns when there is data available", func(t *testing.T) {
reader.buffer <- byte(42)
assert.True(t, tests.ChannelReceivesSomething(read, tests.ShortTimeout))
})
}
func TestCodeOceanToRawReaderReturnsOnlyAfterOneByteWasReadFromConnection(t *testing.T) {
messages := make(chan io.Reader)
connection := &webSocketConnectionMock{}
connection.On("WriteMessage", mock.AnythingOfType("int"), mock.AnythingOfType("[]uint8")).Return(nil)
connection.On("CloseHandler").Return(nil)
connection.On("SetCloseHandler", mock.Anything).Return()
call := connection.On("NextReader")
call.Run(func(_ mock.Arguments) {
call.Return(websocket.TextMessage, <-messages, nil)
})
reader := newCodeOceanToRawReader(connection)
cancel := reader.startReadInputLoop()
defer cancel()
read := make(chan bool)
//nolint:makezero // this is required here to make the Read call blocking
message := make([]byte, 10)
go func() {
_, err := reader.Read(message)
require.NoError(t, err)
read <- true
}()
t.Run("Does not return immediately when there is no data", func(t *testing.T) {
assert.False(t, tests.ChannelReceivesSomething(read, tests.ShortTimeout))
})
t.Run("Returns when there is data available", func(t *testing.T) {
messages <- strings.NewReader("Hello")
assert.True(t, tests.ChannelReceivesSomething(read, tests.ShortTimeout))
})
}
// --- Test suite specific test helpers ---
func newNomadAllocationWithMockedAPIClient(runnerID string) (r runner.Runner, executorAPIMock *nomad.ExecutorAPIMock) {
executorAPIMock = &nomad.ExecutorAPIMock{}
r = runner.NewNomadJob(runnerID, nil, executorAPIMock, nil)
return
}
func webSocketURL(scheme string, server *httptest.Server, router *mux.Router,
runnerID string, executionID runner.ExecutionID,
) (*url.URL, error) {
websocketURL, err := url.Parse(server.URL)
if err != nil {
return nil, err
}
path, err := router.Get(WebsocketPath).URL(RunnerIDKey, runnerID)
if err != nil {
return nil, err
}
websocketURL.Scheme = scheme
websocketURL.Path = path.Path
websocketURL.RawQuery = fmt.Sprintf("executionID=%s", executionID)
return websocketURL, nil
}
func (s *WebSocketTestSuite) webSocketURL(scheme, runnerID string, executionID runner.ExecutionID) (*url.URL, error) {
return webSocketURL(scheme, s.server, s.router, runnerID, executionID)
}
var executionRequestLs = dto.ExecutionRequest{Command: "ls"}
// mockAPIExecuteLs mocks the ExecuteCommand of an ExecutorApi to act as if
// 'ls existing-file non-existing-file' was executed.
func mockAPIExecuteLs(api *nomad.ExecutorAPIMock) {
mockAPIExecute(api, &executionRequestLs,
func(_ string, _ context.Context, _ []string, _ bool, _ io.Reader, stdout, stderr io.Writer) (int, error) {
_, _ = stdout.Write([]byte("existing-file\n"))
_, _ = stderr.Write([]byte("ls: cannot access 'non-existing-file': No such file or directory\n"))
return 0, nil
})
}
var executionRequestHead = dto.ExecutionRequest{Command: "head -n 1"}
// mockAPIExecuteHead mocks the ExecuteCommand of an ExecutorApi to act as if 'head -n 1' was executed.
func mockAPIExecuteHead(api *nomad.ExecutorAPIMock) {
mockAPIExecute(api, &executionRequestHead,
func(_ string, _ context.Context, _ []string, _ bool,
stdin io.Reader, stdout io.Writer, stderr io.Writer,
) (int, error) {
scanner := bufio.NewScanner(stdin)
for !scanner.Scan() {
scanner = bufio.NewScanner(stdin)
}
_, _ = stdout.Write(scanner.Bytes())
return 0, nil
})
}
var executionRequestSleep = dto.ExecutionRequest{Command: "sleep infinity"}
// mockAPIExecuteSleep mocks the ExecuteCommand method of an ExecutorAPI to sleep until the execution is canceled.
func mockAPIExecuteSleep(api *nomad.ExecutorAPIMock) <-chan bool {
canceled := make(chan bool, 1)
mockAPIExecute(api, &executionRequestSleep,
func(_ string, ctx context.Context, _ []string, _ bool,
stdin io.Reader, stdout io.Writer, stderr io.Writer,
) (int, error) {
<-ctx.Done()
close(canceled)
return 0, ctx.Err()
})
return canceled
}
var executionRequestError = dto.ExecutionRequest{Command: "error"}
// mockAPIExecuteError mocks the ExecuteCommand method of an ExecutorApi to return an error.
func mockAPIExecuteError(api *nomad.ExecutorAPIMock) {
mockAPIExecute(api, &executionRequestError,
func(_ string, _ context.Context, _ []string, _ bool, _ io.Reader, _, _ io.Writer) (int, error) {
return 0, tests.ErrDefault
})
}
var executionRequestExitNonZero = dto.ExecutionRequest{Command: "exit 42"}
// mockAPIExecuteExitNonZero mocks the ExecuteCommand method of an ExecutorApi to exit with exit status 42.
func mockAPIExecuteExitNonZero(api *nomad.ExecutorAPIMock) {
mockAPIExecute(api, &executionRequestExitNonZero,
func(_ string, _ context.Context, _ []string, _ bool, _ io.Reader, _, _ io.Writer) (int, error) {
return 42, nil
})
}
// mockAPIExecute mocks the ExecuteCommand method of an ExecutorApi to call the given method run when the command
// corresponding to the given ExecutionRequest is called.
func mockAPIExecute(api *nomad.ExecutorAPIMock, request *dto.ExecutionRequest,
run func(runnerId string, ctx context.Context, command []string, tty bool,
stdin io.Reader, stdout, stderr io.Writer) (int, error),
) {
call := api.On("ExecuteCommand",
mock.AnythingOfType("string"),
mock.Anything,
request.FullCommand(),
mock.AnythingOfType("bool"),
mock.Anything,
mock.Anything,
mock.Anything)
call.Run(func(args mock.Arguments) {
exit, err := run(args.Get(0).(string),
args.Get(1).(context.Context),
args.Get(2).([]string),
args.Get(3).(bool),
args.Get(4).(io.Reader),
args.Get(5).(io.Writer),
args.Get(6).(io.Writer))
call.ReturnArguments = mock.Arguments{exit, err}
})
}

195
internal/config/config.go Normal file
View File

@ -0,0 +1,195 @@
package config
import (
"crypto/tls"
"errors"
"flag"
"fmt"
"github.com/sirupsen/logrus"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/logging"
"gopkg.in/yaml.v3"
"net/url"
"os"
"reflect"
"strconv"
"strings"
)
// Config contains the default configuration of Poseidon.
var (
Config = &configuration{
Server: server{
Address: "127.0.0.1",
Port: 7200,
Token: "",
TLS: false,
CertFile: "",
KeyFile: "",
InteractiveStderr: true,
},
Nomad: nomad{
Address: "127.0.0.1",
Port: 4646,
Token: "",
TLS: false,
Namespace: "default",
},
Logger: logger{
Level: "INFO",
},
}
configurationFilePath = "./configuration.yaml"
configurationInitialized = false
log = logging.GetLogger("config")
TLSConfig = &tls.Config{
MinVersion: tls.VersionTLS13,
CurvePreferences: []tls.CurveID{tls.CurveP521, tls.CurveP384, tls.CurveP256},
PreferServerCipherSuites: true,
}
ErrConfigInitialized = errors.New("configuration is already initialized")
)
// server configures the Poseidon webserver.
type server struct {
Address string
Port int
Token string
TLS bool
CertFile string
KeyFile string
InteractiveStderr bool
}
// nomad configures the used Nomad cluster.
type nomad struct {
Address string
Port int
Token string
TLS bool
Namespace string
}
// logger configures the used logger.
type logger struct {
Level string
}
// configuration contains the complete configuration of Poseidon.
type configuration struct {
Server server
Nomad nomad
Logger logger
}
// InitConfig merges configuration options from environment variables and
// a configuration file into the default configuration. Calls of InitConfig
// after the first call have no effect and return an error. InitConfig
// should be called directly after starting the program.
func InitConfig() error {
if configurationInitialized {
return ErrConfigInitialized
}
configurationInitialized = true
content := readConfigFile()
Config.mergeYaml(content)
Config.mergeEnvironmentVariables()
return nil
}
// NomadAPIURL returns the URL for the configured Nomad cluster.
func (c *configuration) NomadAPIURL() *url.URL {
return parseURL(Config.Nomad.Address, Config.Nomad.Port, Config.Nomad.TLS)
}
// PoseidonAPIURL returns the URL of the Poseidon webserver.
func (c *configuration) PoseidonAPIURL() *url.URL {
return parseURL(Config.Server.Address, Config.Server.Port, false)
}
func parseURL(address string, port int, tlsEnabled bool) *url.URL {
scheme := "http"
if tlsEnabled {
scheme = "https"
}
return &url.URL{
Scheme: scheme,
Host: fmt.Sprintf("%s:%d", address, port),
}
}
func readConfigFile() []byte {
parseFlags()
data, err := os.ReadFile(configurationFilePath)
if err != nil {
log.WithError(err).Info("Using default configuration...")
return nil
}
return data
}
func parseFlags() {
if flag.Lookup("config") == nil {
flag.StringVar(&configurationFilePath, "config", configurationFilePath, "path of the yaml config file")
}
flag.Parse()
}
func (c *configuration) mergeYaml(content []byte) {
if err := yaml.Unmarshal(content, c); err != nil {
log.WithError(err).Fatal("Could not parse configuration file")
}
}
func (c *configuration) mergeEnvironmentVariables() {
readFromEnvironment("POSEIDON", reflect.ValueOf(c).Elem())
}
func readFromEnvironment(prefix string, value reflect.Value) {
logEntry := log.WithField("prefix", prefix)
// if value was not derived from a pointer, it is not possible to alter its contents
if !value.CanSet() {
logEntry.Warn("Cannot overwrite struct field that can not be set")
return
}
if value.Kind() != reflect.Struct {
loadValue(prefix, value, logEntry)
} else {
for i := 0; i < value.NumField(); i++ {
fieldName := value.Type().Field(i).Name
newPrefix := fmt.Sprintf("%s_%s", prefix, strings.ToUpper(fieldName))
readFromEnvironment(newPrefix, value.Field(i))
}
}
}
func loadValue(prefix string, value reflect.Value, logEntry *logrus.Entry) {
content, ok := os.LookupEnv(prefix)
if !ok {
return
}
logEntry = logEntry.WithField("content", content)
switch value.Kind() {
case reflect.String:
value.SetString(content)
case reflect.Int:
integer, err := strconv.Atoi(content)
if err != nil {
logEntry.Warn("Could not parse environment variable as integer")
return
}
value.SetInt(int64(integer))
case reflect.Bool:
boolean, err := strconv.ParseBool(content)
if err != nil {
logEntry.Warn("Could not parse environment variable as boolean")
return
}
value.SetBool(boolean)
default:
// ignore this field
logEntry.WithField("type", value.Type().Name()).
Warn("Setting configuration option via environment variables is not supported")
}
}

View File

@ -0,0 +1,208 @@
package config
import (
"fmt"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"os"
"path/filepath"
"reflect"
"testing"
)
var (
getServerPort = func(c *configuration) interface{} { return c.Server.Port }
getNomadToken = func(c *configuration) interface{} { return c.Nomad.Token }
getNomadTLS = func(c *configuration) interface{} { return c.Nomad.TLS }
)
func newTestConfiguration() *configuration {
return &configuration{
Server: server{
Address: "127.0.0.1",
Port: 3000,
},
Nomad: nomad{
Address: "127.0.0.2",
Port: 4646,
Token: "SECRET",
TLS: false,
},
Logger: logger{
Level: "INFO",
},
}
}
func (c *configuration) getReflectValue() reflect.Value {
return reflect.ValueOf(c).Elem()
}
// writeConfigurationFile creates a file on disk and returns the path to it.
func writeConfigurationFile(t *testing.T, name string, content []byte) string {
t.Helper()
directory := t.TempDir()
filePath := filepath.Join(directory, name)
file, err := os.Create(filePath)
if err != nil {
t.Fatal("Could not create config file")
}
defer file.Close()
_, err = file.Write(content)
require.NoError(t, err)
return filePath
}
func TestCallingInitConfigTwiceReturnsError(t *testing.T) {
configurationInitialized = false
err := InitConfig()
assert.NoError(t, err)
err = InitConfig()
assert.Error(t, err)
}
func TestCallingInitConfigTwiceDoesNotChangeConfig(t *testing.T) {
configurationInitialized = false
err := InitConfig()
require.NoError(t, err)
Config = newTestConfiguration()
filePath := writeConfigurationFile(t, "test.yaml", []byte("server:\n port: 5000\n"))
oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = append(os.Args, "-config", filePath)
err = InitConfig()
require.Error(t, err)
assert.Equal(t, 3000, Config.Server.Port)
}
func TestReadEnvironmentVariables(t *testing.T) {
var environmentTests = []struct {
variableSuffix string
valueToSet string
expectedValue interface{}
getTargetField func(*configuration) interface{}
}{
{"SERVER_PORT", "4000", 4000, getServerPort},
{"SERVER_PORT", "hello", 3000, getServerPort},
{"NOMAD_TOKEN", "ACCESS", "ACCESS", getNomadToken},
{"NOMAD_TLS", "true", true, getNomadTLS},
{"NOMAD_TLS", "hello", false, getNomadTLS},
}
prefix := "POSEIDON_TEST"
for _, testCase := range environmentTests {
config := newTestConfiguration()
environmentVariable := fmt.Sprintf("%s_%s", prefix, testCase.variableSuffix)
_ = os.Setenv(environmentVariable, testCase.valueToSet)
readFromEnvironment(prefix, config.getReflectValue())
_ = os.Unsetenv(environmentVariable)
assert.Equal(t, testCase.expectedValue, testCase.getTargetField(config))
}
}
func TestReadEnvironmentIgnoresNonPointerValue(t *testing.T) {
config := newTestConfiguration()
_ = os.Setenv("POSEIDON_TEST_SERVER_PORT", "4000")
readFromEnvironment("POSEIDON_TEST", reflect.ValueOf(config))
_ = os.Unsetenv("POSEIDON_TEST_SERVER_PORT")
assert.Equal(t, 3000, config.Server.Port)
}
func TestReadEnvironmentIgnoresNotSupportedType(t *testing.T) {
config := &struct{ Timeout float64 }{1.0}
_ = os.Setenv("POSEIDON_TEST_TIMEOUT", "2.5")
readFromEnvironment("POSEIDON_TEST", reflect.ValueOf(config).Elem())
_ = os.Unsetenv("POSEIDON_TEST_TIMEOUT")
assert.Equal(t, 1.0, config.Timeout)
}
func TestUnsetEnvironmentVariableDoesNotChangeConfig(t *testing.T) {
config := newTestConfiguration()
readFromEnvironment("POSEIDON_TEST", config.getReflectValue())
assert.Equal(t, "INFO", config.Logger.Level)
}
func TestReadYamlConfigFile(t *testing.T) {
var yamlTests = []struct {
content []byte
expectedValue interface{}
getTargetField func(*configuration) interface{}
}{
{[]byte("server:\n port: 5000\n"), 5000, getServerPort},
{[]byte("nomad:\n token: ACCESS\n"), "ACCESS", getNomadToken},
{[]byte("nomad:\n tls: true\n"), true, getNomadTLS},
{[]byte(""), false, getNomadTLS},
{[]byte("nomad:\n token:\n"), "SECRET", getNomadToken},
}
for _, testCase := range yamlTests {
config := newTestConfiguration()
config.mergeYaml(testCase.content)
assert.Equal(t, testCase.expectedValue, testCase.getTargetField(config))
}
}
func TestInvalidYamlExitsProgram(t *testing.T) {
logger, hook := test.NewNullLogger()
// this function is used when calling log.Fatal() and
// prevents the program from exiting during this test
logger.ExitFunc = func(code int) {}
log = logger.WithField("package", "config_test")
config := newTestConfiguration()
config.mergeYaml([]byte("logger: level: DEBUG"))
assert.Equal(t, 1, len(hook.Entries))
assert.Equal(t, logrus.FatalLevel, hook.LastEntry().Level)
}
func TestReadConfigFileOverwritesConfig(t *testing.T) {
Config = newTestConfiguration()
filePath := writeConfigurationFile(t, "test.yaml", []byte("server:\n port: 5000\n"))
oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = append(os.Args, "-config", filePath)
configurationInitialized = false
err := InitConfig()
require.NoError(t, err)
assert.Equal(t, 5000, Config.Server.Port)
}
func TestReadNonExistingConfigFileDoesNotOverwriteConfig(t *testing.T) {
Config = newTestConfiguration()
oldArgs := os.Args
defer func() { os.Args = oldArgs }()
os.Args = append(os.Args, "-config", "file_does_not_exist.yaml")
configurationInitialized = false
err := InitConfig()
require.NoError(t, err)
assert.Equal(t, 3000, Config.Server.Port)
}
func TestURLParsing(t *testing.T) {
var urlTests = []struct {
address string
port int
tls bool
expectedScheme string
expectedHost string
}{
{"localhost", 3000, false, "http", "localhost:3000"},
{"127.0.0.1", 4000, true, "https", "127.0.0.1:4000"},
}
for _, testCase := range urlTests {
url := parseURL(testCase.address, testCase.port, testCase.tls)
assert.Equal(t, testCase.expectedScheme, url.Scheme)
assert.Equal(t, testCase.expectedHost, url.Host)
}
}
func TestNomadAPIURL(t *testing.T) {
config := newTestConfiguration()
assert.Equal(t, "http", config.NomadAPIURL().Scheme)
assert.Equal(t, "127.0.0.2:4646", config.NomadAPIURL().Host)
}
func TestPoseidonAPIURL(t *testing.T) {
config := newTestConfiguration()
assert.Equal(t, "http", config.PoseidonAPIURL().Scheme)
assert.Equal(t, "127.0.0.1:3000", config.PoseidonAPIURL().Host)
}

View File

@ -0,0 +1,132 @@
package environment
import (
_ "embed"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/jobspec2"
"github.com/hashicorp/nomad/nomad/structs"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/logging"
"strconv"
)
// templateEnvironmentJobHCL holds our default job in HCL format.
// The default job is used when creating new job and provides
// common settings that all the jobs share.
//go:embed template-environment-job.hcl
var templateEnvironmentJobHCL string
var log = logging.GetLogger("environment")
// Manager encapsulates API calls to the executor API for creation and deletion of execution environments.
type Manager interface {
// Load fetches all already created execution environments from the executor and registers them at the runner manager.
// It should be called during the startup process (e.g. on creation of the Manager).
Load() error
// CreateOrUpdate creates/updates an execution environment on the executor.
// If the job was created, the returned boolean is true, if it was updated, it is false.
// If err is not nil, that means the environment was neither created nor updated.
CreateOrUpdate(
id runner.EnvironmentID,
request dto.ExecutionEnvironmentRequest,
) (bool, error)
}
func NewNomadEnvironmentManager(
runnerManager runner.Manager,
apiClient nomad.ExecutorAPI,
) *NomadEnvironmentManager {
m := &NomadEnvironmentManager{runnerManager, apiClient, *parseJob(templateEnvironmentJobHCL)}
if err := m.Load(); err != nil {
log.WithError(err).Error("Error recovering the execution environments")
}
runnerManager.Load()
return m
}
type NomadEnvironmentManager struct {
runnerManager runner.Manager
api nomad.ExecutorAPI
templateEnvironmentJob nomadApi.Job
}
func (m *NomadEnvironmentManager) CreateOrUpdate(
id runner.EnvironmentID,
request dto.ExecutionEnvironmentRequest,
) (bool, error) {
templateJob, err := m.api.RegisterTemplateJob(&m.templateEnvironmentJob, runner.TemplateJobID(id),
request.PrewarmingPoolSize, request.CPULimit, request.MemoryLimit,
request.Image, request.NetworkAccess, request.ExposedPorts)
if err != nil {
return false, fmt.Errorf("error registering template job in API: %w", err)
}
created, err := m.runnerManager.CreateOrUpdateEnvironment(id, request.PrewarmingPoolSize, templateJob, true)
if err != nil {
return created, fmt.Errorf("error updating environment in runner manager: %w", err)
}
return created, nil
}
func (m *NomadEnvironmentManager) Load() error {
templateJobs, err := m.api.LoadEnvironmentJobs()
if err != nil {
return fmt.Errorf("couldn't load template jobs: %w", err)
}
for _, job := range templateJobs {
jobLogger := log.WithField("jobID", *job.ID)
if *job.Status != structs.JobStatusRunning {
jobLogger.Info("Job not running, skipping ...")
continue
}
configTaskGroup := nomad.FindConfigTaskGroup(job)
if configTaskGroup == nil {
jobLogger.Info("Couldn't find config task group in job, skipping ...")
continue
}
desiredIdleRunnersCount, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaPoolSizeKey])
if err != nil {
jobLogger.Infof("Couldn't convert pool size to int: %v, skipping ...", err)
continue
}
environmentIDString, err := runner.EnvironmentIDFromTemplateJobID(*job.ID)
if err != nil {
jobLogger.WithError(err).Error("Couldn't retrieve environment id from template job")
}
environmentID, err := runner.NewEnvironmentID(environmentIDString)
if err != nil {
jobLogger.WithField("environmentID", environmentIDString).
WithError(err).
Error("Couldn't retrieve environmentID from string")
continue
}
_, err = m.runnerManager.CreateOrUpdateEnvironment(environmentID, uint(desiredIdleRunnersCount), job, false)
if err != nil {
jobLogger.WithError(err).Info("Could not recover job.")
continue
}
jobLogger.Info("Successfully recovered environment")
}
return nil
}
func parseJob(jobHCL string) *nomadApi.Job {
config := jobspec2.ParseConfig{
Body: []byte(jobHCL),
AllowFS: false,
Strict: true,
}
job, err := jobspec2.ParseWithConfig(&config)
if err != nil {
log.WithError(err).Fatal("Error parsing Nomad job")
return nil
}
return job
}

View File

@ -0,0 +1,55 @@
// Code generated by mockery v2.8.0. DO NOT EDIT.
package environment
import (
mock "github.com/stretchr/testify/mock"
dto "gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
runner "gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
)
// ManagerMock is an autogenerated mock type for the Manager type
type ManagerMock struct {
mock.Mock
}
// CreateOrUpdate provides a mock function with given fields: id, request
func (_m *ManagerMock) CreateOrUpdate(id runner.EnvironmentID, request dto.ExecutionEnvironmentRequest) (bool, error) {
ret := _m.Called(id, request)
var r0 bool
if rf, ok := ret.Get(0).(func(runner.EnvironmentID, dto.ExecutionEnvironmentRequest) bool); ok {
r0 = rf(id, request)
} else {
r0 = ret.Get(0).(bool)
}
var r1 error
if rf, ok := ret.Get(1).(func(runner.EnvironmentID, dto.ExecutionEnvironmentRequest) error); ok {
r1 = rf(id, request)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Delete provides a mock function with given fields: id
func (_m *ManagerMock) Delete(id string) {
_m.Called(id)
}
// Load provides a mock function with given fields:
func (_m *ManagerMock) Load() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}

View File

@ -0,0 +1,142 @@
package environment
import (
nomadApi "github.com/hashicorp/nomad/api"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/runner"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"testing"
)
type CreateOrUpdateTestSuite struct {
suite.Suite
runnerManagerMock runner.ManagerMock
apiMock nomad.ExecutorAPIMock
request dto.ExecutionEnvironmentRequest
manager *NomadEnvironmentManager
environmentID runner.EnvironmentID
}
func TestCreateOrUpdateTestSuite(t *testing.T) {
suite.Run(t, new(CreateOrUpdateTestSuite))
}
func (s *CreateOrUpdateTestSuite) SetupTest() {
s.runnerManagerMock = runner.ManagerMock{}
s.apiMock = nomad.ExecutorAPIMock{}
s.request = dto.ExecutionEnvironmentRequest{
PrewarmingPoolSize: 10,
CPULimit: 20,
MemoryLimit: 30,
Image: "my-image",
NetworkAccess: false,
ExposedPorts: nil,
}
s.manager = &NomadEnvironmentManager{
runnerManager: &s.runnerManagerMock,
api: &s.apiMock,
}
s.environmentID = runner.EnvironmentID(tests.DefaultEnvironmentIDAsInteger)
}
func (s *CreateOrUpdateTestSuite) mockRegisterTemplateJob(job *nomadApi.Job, err error) {
s.apiMock.On("RegisterTemplateJob",
mock.AnythingOfType("*api.Job"), mock.AnythingOfType("string"),
mock.AnythingOfType("uint"), mock.AnythingOfType("uint"), mock.AnythingOfType("uint"),
mock.AnythingOfType("string"), mock.AnythingOfType("bool"), mock.AnythingOfType("[]uint16")).
Return(job, err)
}
func (s *CreateOrUpdateTestSuite) mockCreateOrUpdateEnvironment(created bool, err error) {
s.runnerManagerMock.On("CreateOrUpdateEnvironment", mock.AnythingOfType("EnvironmentID"),
mock.AnythingOfType("uint"), mock.AnythingOfType("*api.Job"), mock.AnythingOfType("bool")).
Return(created, err)
}
func (s *CreateOrUpdateTestSuite) TestRegistersCorrectTemplateJob() {
s.mockRegisterTemplateJob(&nomadApi.Job{}, nil)
s.mockCreateOrUpdateEnvironment(true, nil)
_, err := s.manager.CreateOrUpdate(s.environmentID, s.request)
s.NoError(err)
s.apiMock.AssertCalled(s.T(), "RegisterTemplateJob",
&s.manager.templateEnvironmentJob, runner.TemplateJobID(s.environmentID),
s.request.PrewarmingPoolSize, s.request.CPULimit, s.request.MemoryLimit,
s.request.Image, s.request.NetworkAccess, s.request.ExposedPorts)
}
func (s *CreateOrUpdateTestSuite) TestReturnsErrorWhenRegisterTemplateJobReturnsError() {
s.mockRegisterTemplateJob(nil, tests.ErrDefault)
created, err := s.manager.CreateOrUpdate(s.environmentID, s.request)
s.ErrorIs(err, tests.ErrDefault)
s.False(created)
}
func (s *CreateOrUpdateTestSuite) TestCreatesOrUpdatesCorrectEnvironment() {
templateJobID := tests.DefaultJobID
templateJob := &nomadApi.Job{ID: &templateJobID}
s.mockRegisterTemplateJob(templateJob, nil)
s.mockCreateOrUpdateEnvironment(true, nil)
_, err := s.manager.CreateOrUpdate(s.environmentID, s.request)
s.NoError(err)
s.runnerManagerMock.AssertCalled(s.T(), "CreateOrUpdateEnvironment",
s.environmentID, s.request.PrewarmingPoolSize, templateJob, true)
}
func (s *CreateOrUpdateTestSuite) TestReturnsErrorIfCreatesOrUpdateEnvironmentReturnsError() {
s.mockRegisterTemplateJob(&nomadApi.Job{}, nil)
s.mockCreateOrUpdateEnvironment(false, tests.ErrDefault)
_, err := s.manager.CreateOrUpdate(runner.EnvironmentID(tests.DefaultEnvironmentIDAsInteger), s.request)
s.ErrorIs(err, tests.ErrDefault)
}
func (s *CreateOrUpdateTestSuite) TestReturnsTrueIfCreatesOrUpdateEnvironmentReturnsTrue() {
s.mockRegisterTemplateJob(&nomadApi.Job{}, nil)
s.mockCreateOrUpdateEnvironment(true, nil)
created, err := s.manager.CreateOrUpdate(runner.EnvironmentID(tests.DefaultEnvironmentIDAsInteger), s.request)
s.Require().NoError(err)
s.True(created)
}
func (s *CreateOrUpdateTestSuite) TestReturnsFalseIfCreatesOrUpdateEnvironmentReturnsFalse() {
s.mockRegisterTemplateJob(&nomadApi.Job{}, nil)
s.mockCreateOrUpdateEnvironment(false, nil)
created, err := s.manager.CreateOrUpdate(runner.EnvironmentID(tests.DefaultEnvironmentIDAsInteger), s.request)
s.Require().NoError(err)
s.False(created)
}
func TestParseJob(t *testing.T) {
exited := false
logger, hook := test.NewNullLogger()
logger.ExitFunc = func(i int) {
exited = true
}
log = logger.WithField("pkg", "nomad")
t.Run("parses the given default job", func(t *testing.T) {
job := parseJob(templateEnvironmentJobHCL)
assert.False(t, exited)
assert.NotNil(t, job)
})
t.Run("fatals when given wrong job", func(t *testing.T) {
job := parseJob("")
assert.True(t, exited)
assert.Nil(t, job)
assert.Equal(t, logrus.FatalLevel, hook.LastEntry().Level)
})
}

View File

@ -0,0 +1,79 @@
// This is the default job configuration that is used when no path to another default configuration is given
job "template-0" {
datacenters = ["dc1"]
type = "batch"
group "default-group" {
ephemeral_disk {
migrate = false
size = 10
sticky = false
}
count = 1
scaling {
enabled = true
min = 0
max = 300
}
spread {
// see https://www.nomadproject.io/docs/job-specification/spread#even-spread-across-data-center
// This spreads the load evenly amongst our nodes
attribute = "${node.unique.name}"
weight = 100
}
task "default-task" {
driver = "docker"
kill_timeout = "0s"
kill_signal = "SIGKILL"
config {
image = "drp.codemoon.xopic.de/openhpi/co_execenv_python:3.8"
command = "sleep"
args = ["infinity"]
network_mode = "none"
}
logs {
max_files = 1
max_file_size = 1
}
resources {
cpu = 40
memory = 40
}
restart {
delay = "0s"
}
}
}
group "config" {
// We want to store whether a task is in use in order to recover from a downtime.
// Without a separate config task, marking a task as used would result in a restart of that task,
// as the meta information is passed to the container as environment variables.
count = 0
task "config" {
driver = "exec"
config {
command = "true"
}
logs {
max_files = 1
max_file_size = 1
}
resources {
// minimum values
cpu = 1
memory = 10
}
}
meta {
used = "false"
prewarmingPoolSize = "0"
}
}
}

View File

@ -0,0 +1,213 @@
package nomad
import (
"context"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"io"
"net/url"
)
var (
ErrorNoAllocationFound = errors.New("no allocation found")
)
// apiQuerier provides access to the Nomad functionality.
type apiQuerier interface {
// init prepares an apiClient to be able to communicate to a provided Nomad API.
init(nomadURL *url.URL, nomadNamespace string) (err error)
// LoadJobList loads the list of jobs from the Nomad API.
LoadJobList() (list []*nomadApi.JobListStub, err error)
// JobScale returns the scale of the passed job.
JobScale(jobID string) (jobScale uint, err error)
// SetJobScale sets the scaling count of the passed job to Nomad.
SetJobScale(jobID string, count uint, reason string) (err error)
// DeleteRunner deletes the runner with the given ID.
DeleteRunner(runnerID string) (err error)
// Execute runs a command in the passed job.
Execute(jobID string, ctx context.Context, command []string, tty bool,
stdin io.Reader, stdout, stderr io.Writer) (int, error)
// listJobs loads all jobs with the specified prefix.
listJobs(prefix string) (allocationListStub []*nomadApi.JobListStub, err error)
// job returns the job of the given jobID.
job(jobID string) (job *nomadApi.Job, err error)
// allocation returns the first allocation of the given job.
allocation(jobID string) (*nomadApi.Allocation, error)
// RegisterNomadJob registers a job with Nomad.
// It returns the evaluation ID that can be used when listening to the Nomad event stream.
RegisterNomadJob(job *nomadApi.Job) (string, error)
// EvaluationStream returns a Nomad event stream filtered to return only events belonging to the
// given evaluation ID.
EvaluationStream(evalID string, ctx context.Context) (<-chan *nomadApi.Events, error)
// AllocationStream returns a Nomad event stream filtered to return only allocation events.
AllocationStream(ctx context.Context) (<-chan *nomadApi.Events, error)
}
// nomadAPIClient implements the nomadApiQuerier interface and provides access to a real Nomad API.
type nomadAPIClient struct {
client *nomadApi.Client
namespace string
}
func (nc *nomadAPIClient) init(nomadURL *url.URL, nomadNamespace string) (err error) {
nc.client, err = nomadApi.NewClient(&nomadApi.Config{
Address: nomadURL.String(),
TLSConfig: &nomadApi.TLSConfig{},
Namespace: nomadNamespace,
})
if err != nil {
return fmt.Errorf("error creating new Nomad client: %w", err)
}
nc.namespace = nomadNamespace
return nil
}
func (nc *nomadAPIClient) DeleteRunner(runnerID string) (err error) {
_, _, err = nc.client.Jobs().Deregister(runnerID, true, nc.writeOptions())
return
}
func (nc *nomadAPIClient) Execute(runnerID string,
ctx context.Context, command []string, tty bool,
stdin io.Reader, stdout, stderr io.Writer,
) (int, error) {
allocations, _, err := nc.client.Jobs().Allocations(runnerID, false, nil)
if err != nil {
return 1, fmt.Errorf("error retrieving allocations for runner: %w", err)
}
if len(allocations) == 0 {
return 1, ErrorNoAllocationFound
}
allocation, _, err := nc.client.Allocations().Info(allocations[0].ID, nil)
if err != nil {
return 1, fmt.Errorf("error retrieving allocation info: %w", err)
}
exitCode, err := nc.client.Allocations().Exec(ctx, allocation, TaskName, tty, command, stdin, stdout, stderr, nil, nil)
if err != nil {
return 1, fmt.Errorf("error executing command in allocation: %w", err)
}
return exitCode, nil
}
func (nc *nomadAPIClient) listJobs(prefix string) ([]*nomadApi.JobListStub, error) {
q := nomadApi.QueryOptions{
Namespace: nc.namespace,
Prefix: prefix,
}
jobs, _, err := nc.client.Jobs().List(&q)
if err != nil {
return nil, fmt.Errorf("error listing Nomad jobs: %w", err)
}
return jobs, nil
}
func (nc *nomadAPIClient) RegisterNomadJob(job *nomadApi.Job) (string, error) {
job.Namespace = &nc.namespace
resp, _, err := nc.client.Jobs().Register(job, nil)
if err != nil {
return "", fmt.Errorf("error registering Nomad job: %w", err)
}
if resp.Warnings != "" {
log.
WithField("job", job).
WithField("warnings", resp.Warnings).
Warn("Received warnings when registering job")
}
return resp.EvalID, nil
}
func (nc *nomadAPIClient) EvaluationStream(evalID string, ctx context.Context) (<-chan *nomadApi.Events, error) {
stream, err := nc.client.EventStream().Stream(
ctx,
map[nomadApi.Topic][]string{
nomadApi.TopicEvaluation: {evalID},
},
0,
nc.queryOptions())
if err != nil {
return nil, fmt.Errorf("error retrieving Nomad Evaluation event stream: %w", err)
}
return stream, nil
}
func (nc *nomadAPIClient) AllocationStream(ctx context.Context) (<-chan *nomadApi.Events, error) {
stream, err := nc.client.EventStream().Stream(
ctx,
map[nomadApi.Topic][]string{
nomadApi.TopicAllocation: {},
},
0,
nc.queryOptions())
if err != nil {
return nil, fmt.Errorf("error retrieving Nomad Allocation event stream: %w", err)
}
return stream, nil
}
func (nc *nomadAPIClient) queryOptions() *nomadApi.QueryOptions {
return &nomadApi.QueryOptions{
Namespace: nc.namespace,
}
}
func (nc *nomadAPIClient) writeOptions() *nomadApi.WriteOptions {
return &nomadApi.WriteOptions{
Namespace: nc.namespace,
}
}
// LoadJobList loads the list of jobs from the Nomad api.
func (nc *nomadAPIClient) LoadJobList() (list []*nomadApi.JobListStub, err error) {
list, _, err = nc.client.Jobs().List(nc.queryOptions())
return
}
// JobScale returns the scale of the passed job.
func (nc *nomadAPIClient) JobScale(jobID string) (uint, error) {
status, _, err := nc.client.Jobs().ScaleStatus(jobID, nc.queryOptions())
if err != nil {
return 0, fmt.Errorf("error retrieving scale status of job: %w", err)
}
// ToDo: Consider counting also the placed and desired allocations
jobScale := uint(status.TaskGroups[TaskGroupName].Running)
return jobScale, nil
}
// SetJobScale sets the scaling count of the passed job to Nomad.
func (nc *nomadAPIClient) SetJobScale(jobID string, count uint, reason string) (err error) {
intCount := int(count)
_, _, err = nc.client.Jobs().Scale(jobID, TaskGroupName, &intCount, reason, false, nil, nil)
return
}
func (nc *nomadAPIClient) job(jobID string) (job *nomadApi.Job, err error) {
job, _, err = nc.client.Jobs().Info(jobID, nil)
return
}
func (nc *nomadAPIClient) allocation(jobID string) (alloc *nomadApi.Allocation, err error) {
allocs, _, err := nc.client.Jobs().Allocations(jobID, false, nil)
if err != nil {
return nil, fmt.Errorf("error requesting Nomad job allocations: %w", err)
}
if len(allocs) == 0 {
return nil, ErrorNoAllocationFound
}
alloc, _, err = nc.client.Allocations().Info(allocs[0].ID, nil)
if err != nil {
return nil, fmt.Errorf("error requesting Nomad allocation info: %w", err)
}
return alloc, nil
}

View File

@ -0,0 +1,263 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package nomad
import (
context "context"
api "github.com/hashicorp/nomad/api"
io "io"
mock "github.com/stretchr/testify/mock"
url "net/url"
)
// apiQuerierMock is an autogenerated mock type for the apiQuerier type
type apiQuerierMock struct {
mock.Mock
}
// AllocationStream provides a mock function with given fields: ctx
func (_m *apiQuerierMock) AllocationStream(ctx context.Context) (<-chan *api.Events, error) {
ret := _m.Called(ctx)
var r0 <-chan *api.Events
if rf, ok := ret.Get(0).(func(context.Context) <-chan *api.Events); ok {
r0 = rf(ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan *api.Events)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(context.Context) error); ok {
r1 = rf(ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DeleteRunner provides a mock function with given fields: runnerId
func (_m *apiQuerierMock) DeleteRunner(runnerId string) error {
ret := _m.Called(runnerId)
var r0 error
if rf, ok := ret.Get(0).(func(string) error); ok {
r0 = rf(runnerId)
} else {
r0 = ret.Error(0)
}
return r0
}
// EvaluationStream provides a mock function with given fields: evalID, ctx
func (_m *apiQuerierMock) EvaluationStream(evalID string, ctx context.Context) (<-chan *api.Events, error) {
ret := _m.Called(evalID, ctx)
var r0 <-chan *api.Events
if rf, ok := ret.Get(0).(func(string, context.Context) <-chan *api.Events); ok {
r0 = rf(evalID, ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan *api.Events)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context) error); ok {
r1 = rf(evalID, ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Execute provides a mock function with given fields: jobID, ctx, command, tty, stdin, stdout, stderr
func (_m *apiQuerierMock) Execute(jobID string, ctx context.Context, command []string, tty bool, stdin io.Reader, stdout io.Writer, stderr io.Writer) (int, error) {
ret := _m.Called(jobID, ctx, command, tty, stdin, stdout, stderr)
var r0 int
if rf, ok := ret.Get(0).(func(string, context.Context, []string, bool, io.Reader, io.Writer, io.Writer) int); ok {
r0 = rf(jobID, ctx, command, tty, stdin, stdout, stderr)
} else {
r0 = ret.Get(0).(int)
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context, []string, bool, io.Reader, io.Writer, io.Writer) error); ok {
r1 = rf(jobID, ctx, command, tty, stdin, stdout, stderr)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// JobScale provides a mock function with given fields: jobID
func (_m *apiQuerierMock) JobScale(jobId string) (uint, error) {
ret := _m.Called(jobId)
var r0 uint
if rf, ok := ret.Get(0).(func(string) uint); ok {
r0 = rf(jobId)
} else {
r0 = ret.Get(0).(uint)
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(jobId)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadJobList provides a mock function with given fields:
func (_m *apiQuerierMock) LoadJobList() ([]*api.JobListStub, error) {
ret := _m.Called()
var r0 []*api.JobListStub
if rf, ok := ret.Get(0).(func() []*api.JobListStub); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*api.JobListStub)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// RegisterNomadJob provides a mock function with given fields: job
func (_m *apiQuerierMock) RegisterNomadJob(job *api.Job) (string, error) {
ret := _m.Called(job)
var r0 string
if rf, ok := ret.Get(0).(func(*api.Job) string); ok {
r0 = rf(job)
} else {
r0 = ret.Get(0).(string)
}
var r1 error
if rf, ok := ret.Get(1).(func(*api.Job) error); ok {
r1 = rf(job)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// SetJobScale provides a mock function with given fields: jobID, count, reason
func (_m *apiQuerierMock) SetJobScale(jobId string, count uint, reason string) error {
ret := _m.Called(jobId, count, reason)
var r0 error
if rf, ok := ret.Get(0).(func(string, uint, string) error); ok {
r0 = rf(jobId, count, reason)
} else {
r0 = ret.Error(0)
}
return r0
}
// allocation provides a mock function with given fields: jobID
func (_m *apiQuerierMock) allocation(jobID string) (*api.Allocation, error) {
ret := _m.Called(jobID)
var r0 *api.Allocation
if rf, ok := ret.Get(0).(func(string) *api.Allocation); ok {
r0 = rf(jobID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*api.Allocation)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(jobID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// init provides a mock function with given fields: nomadURL, nomadNamespace
func (_m *apiQuerierMock) init(nomadURL *url.URL, nomadNamespace string) error {
ret := _m.Called(nomadURL, nomadNamespace)
var r0 error
if rf, ok := ret.Get(0).(func(*url.URL, string) error); ok {
r0 = rf(nomadURL, nomadNamespace)
} else {
r0 = ret.Error(0)
}
return r0
}
// job provides a mock function with given fields: jobID
func (_m *apiQuerierMock) job(jobID string) (*api.Job, error) {
ret := _m.Called(jobID)
var r0 *api.Job
if rf, ok := ret.Get(0).(func(string) *api.Job); ok {
r0 = rf(jobID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*api.Job)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(jobID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// listJobs provides a mock function with given fields: prefix
func (_m *apiQuerierMock) listJobs(prefix string) ([]*api.JobListStub, error) {
ret := _m.Called(prefix)
var r0 []*api.JobListStub
if rf, ok := ret.Get(0).(func(string) []*api.JobListStub); ok {
r0 = rf(prefix)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*api.JobListStub)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(prefix)
} else {
r1 = ret.Error(1)
}
return r0, r1
}

View File

@ -0,0 +1,455 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package nomad
import (
context "context"
api "github.com/hashicorp/nomad/api"
io "io"
mock "github.com/stretchr/testify/mock"
url "net/url"
)
// ExecutorAPIMock is an autogenerated mock type for the ExecutorAPI type
type ExecutorAPIMock struct {
mock.Mock
}
// AllocationStream provides a mock function with given fields: ctx
func (_m *ExecutorAPIMock) AllocationStream(ctx context.Context) (<-chan *api.Events, error) {
ret := _m.Called(ctx)
var r0 <-chan *api.Events
if rf, ok := ret.Get(0).(func(context.Context) <-chan *api.Events); ok {
r0 = rf(ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan *api.Events)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(context.Context) error); ok {
r1 = rf(ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DeleteRunner provides a mock function with given fields: runnerId
func (_m *ExecutorAPIMock) DeleteRunner(runnerId string) error {
ret := _m.Called(runnerId)
var r0 error
if rf, ok := ret.Get(0).(func(string) error); ok {
r0 = rf(runnerId)
} else {
r0 = ret.Error(0)
}
return r0
}
// EvaluationStream provides a mock function with given fields: evalID, ctx
func (_m *ExecutorAPIMock) EvaluationStream(evalID string, ctx context.Context) (<-chan *api.Events, error) {
ret := _m.Called(evalID, ctx)
var r0 <-chan *api.Events
if rf, ok := ret.Get(0).(func(string, context.Context) <-chan *api.Events); ok {
r0 = rf(evalID, ctx)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan *api.Events)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context) error); ok {
r1 = rf(evalID, ctx)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Execute provides a mock function with given fields: jobID, ctx, command, tty, stdin, stdout, stderr
func (_m *ExecutorAPIMock) Execute(jobID string, ctx context.Context, command []string, tty bool, stdin io.Reader, stdout io.Writer, stderr io.Writer) (int, error) {
ret := _m.Called(jobID, ctx, command, tty, stdin, stdout, stderr)
var r0 int
if rf, ok := ret.Get(0).(func(string, context.Context, []string, bool, io.Reader, io.Writer, io.Writer) int); ok {
r0 = rf(jobID, ctx, command, tty, stdin, stdout, stderr)
} else {
r0 = ret.Get(0).(int)
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context, []string, bool, io.Reader, io.Writer, io.Writer) error); ok {
r1 = rf(jobID, ctx, command, tty, stdin, stdout, stderr)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// ExecuteCommand provides a mock function with given fields: allocationID, ctx, command, tty, stdin, stdout, stderr
func (_m *ExecutorAPIMock) ExecuteCommand(allocationID string, ctx context.Context, command []string, tty bool, stdin io.Reader, stdout io.Writer, stderr io.Writer) (int, error) {
ret := _m.Called(allocationID, ctx, command, tty, stdin, stdout, stderr)
var r0 int
if rf, ok := ret.Get(0).(func(string, context.Context, []string, bool, io.Reader, io.Writer, io.Writer) int); ok {
r0 = rf(allocationID, ctx, command, tty, stdin, stdout, stderr)
} else {
r0 = ret.Get(0).(int)
}
var r1 error
if rf, ok := ret.Get(1).(func(string, context.Context, []string, bool, io.Reader, io.Writer, io.Writer) error); ok {
r1 = rf(allocationID, ctx, command, tty, stdin, stdout, stderr)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// JobScale provides a mock function with given fields: jobID
func (_m *ExecutorAPIMock) JobScale(jobId string) (uint, error) {
ret := _m.Called(jobId)
var r0 uint
if rf, ok := ret.Get(0).(func(string) uint); ok {
r0 = rf(jobId)
} else {
r0 = ret.Get(0).(uint)
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(jobId)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadEnvironmentJobs provides a mock function with given fields:
func (_m *ExecutorAPIMock) LoadEnvironmentJobs() ([]*api.Job, error) {
ret := _m.Called()
var r0 []*api.Job
if rf, ok := ret.Get(0).(func() []*api.Job); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*api.Job)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadJobList provides a mock function with given fields:
func (_m *ExecutorAPIMock) LoadJobList() ([]*api.JobListStub, error) {
ret := _m.Called()
var r0 []*api.JobListStub
if rf, ok := ret.Get(0).(func() []*api.JobListStub); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*api.JobListStub)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadRunnerIDs provides a mock function with given fields: environmentID
func (_m *ExecutorAPIMock) LoadRunnerIDs(environmentID string) ([]string, error) {
ret := _m.Called(environmentID)
var r0 []string
if rf, ok := ret.Get(0).(func(string) []string); ok {
r0 = rf(environmentID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]string)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(environmentID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadRunnerJobs provides a mock function with given fields: environmentID
func (_m *ExecutorAPIMock) LoadRunnerJobs(environmentID string) ([]*api.Job, error) {
ret := _m.Called(environmentID)
var r0 []*api.Job
if rf, ok := ret.Get(0).(func(string) []*api.Job); ok {
r0 = rf(environmentID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*api.Job)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(environmentID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadRunnerPorts provides a mock function with given fields: runnerID
func (_m *ExecutorAPIMock) LoadRunnerPortMappings(runnerID string) ([]api.PortMapping, error) {
ret := _m.Called(runnerID)
var r0 []api.PortMapping
if rf, ok := ret.Get(0).(func(string) []api.PortMapping); ok {
r0 = rf(runnerID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]api.PortMapping)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(runnerID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// MarkRunnerAsUsed provides a mock function with given fields: runnerID, duration
func (_m *ExecutorAPIMock) MarkRunnerAsUsed(runnerID string, duration int) error {
ret := _m.Called(runnerID, duration)
var r0 error
if rf, ok := ret.Get(0).(func(string, int) error); ok {
r0 = rf(runnerID, duration)
} else {
r0 = ret.Error(0)
}
return r0
}
// MonitorEvaluation provides a mock function with given fields: evaluationID, ctx
func (_m *ExecutorAPIMock) MonitorEvaluation(evaluationID string, ctx context.Context) error {
ret := _m.Called(evaluationID, ctx)
var r0 error
if rf, ok := ret.Get(0).(func(string, context.Context) error); ok {
r0 = rf(evaluationID, ctx)
} else {
r0 = ret.Error(0)
}
return r0
}
// RegisterNomadJob provides a mock function with given fields: job
func (_m *ExecutorAPIMock) RegisterNomadJob(job *api.Job) (string, error) {
ret := _m.Called(job)
var r0 string
if rf, ok := ret.Get(0).(func(*api.Job) string); ok {
r0 = rf(job)
} else {
r0 = ret.Get(0).(string)
}
var r1 error
if rf, ok := ret.Get(1).(func(*api.Job) error); ok {
r1 = rf(job)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// RegisterRunnerJob provides a mock function with given fields: template
func (_m *ExecutorAPIMock) RegisterRunnerJob(template *api.Job) error {
ret := _m.Called(template)
var r0 error
if rf, ok := ret.Get(0).(func(*api.Job) error); ok {
r0 = rf(template)
} else {
r0 = ret.Error(0)
}
return r0
}
// RegisterTemplateJob provides a mock function with given fields: defaultJob, id, prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts
func (_m *ExecutorAPIMock) RegisterTemplateJob(defaultJob *api.Job, id string, prewarmingPoolSize uint, cpuLimit uint, memoryLimit uint, image string, networkAccess bool, exposedPorts []uint16) (*api.Job, error) {
ret := _m.Called(defaultJob, id, prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
var r0 *api.Job
if rf, ok := ret.Get(0).(func(*api.Job, string, uint, uint, uint, string, bool, []uint16) *api.Job); ok {
r0 = rf(defaultJob, id, prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*api.Job)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(*api.Job, string, uint, uint, uint, string, bool, []uint16) error); ok {
r1 = rf(defaultJob, id, prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// SetJobScale provides a mock function with given fields: jobID, count, reason
func (_m *ExecutorAPIMock) SetJobScale(jobId string, count uint, reason string) error {
ret := _m.Called(jobId, count, reason)
var r0 error
if rf, ok := ret.Get(0).(func(string, uint, string) error); ok {
r0 = rf(jobId, count, reason)
} else {
r0 = ret.Error(0)
}
return r0
}
// WatchAllocations provides a mock function with given fields: ctx, onNewAllocation, onDeletedAllocation
func (_m *ExecutorAPIMock) WatchAllocations(ctx context.Context, onNewAllocation AllocationProcessor, onDeletedAllocation AllocationProcessor) error {
ret := _m.Called(ctx, onNewAllocation, onDeletedAllocation)
var r0 error
if rf, ok := ret.Get(0).(func(context.Context, AllocationProcessor, AllocationProcessor) error); ok {
r0 = rf(ctx, onNewAllocation, onDeletedAllocation)
} else {
r0 = ret.Error(0)
}
return r0
}
// allocation provides a mock function with given fields: jobID
func (_m *ExecutorAPIMock) allocation(jobID string) (*api.Allocation, error) {
ret := _m.Called(jobID)
var r0 *api.Allocation
if rf, ok := ret.Get(0).(func(string) *api.Allocation); ok {
r0 = rf(jobID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*api.Allocation)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(jobID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// init provides a mock function with given fields: nomadURL, nomadNamespace
func (_m *ExecutorAPIMock) init(nomadURL *url.URL, nomadNamespace string) error {
ret := _m.Called(nomadURL, nomadNamespace)
var r0 error
if rf, ok := ret.Get(0).(func(*url.URL, string) error); ok {
r0 = rf(nomadURL, nomadNamespace)
} else {
r0 = ret.Error(0)
}
return r0
}
// job provides a mock function with given fields: jobID
func (_m *ExecutorAPIMock) job(jobID string) (*api.Job, error) {
ret := _m.Called(jobID)
var r0 *api.Job
if rf, ok := ret.Get(0).(func(string) *api.Job); ok {
r0 = rf(jobID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*api.Job)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(jobID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// listJobs provides a mock function with given fields: prefix
func (_m *ExecutorAPIMock) listJobs(prefix string) ([]*api.JobListStub, error) {
ret := _m.Called(prefix)
var r0 []*api.JobListStub
if rf, ok := ret.Get(0).(func(string) []*api.JobListStub); ok {
r0 = rf(prefix)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*api.JobListStub)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(prefix)
} else {
r1 = ret.Error(1)
}
return r0, r1
}

237
internal/nomad/job.go Normal file
View File

@ -0,0 +1,237 @@
package nomad
import (
"context"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"strconv"
)
const (
TemplateJobPrefix = "template"
TaskGroupName = "default-group"
TaskName = "default-task"
TaskCount = 1
TaskDriver = "docker"
TaskCommand = "sleep"
ConfigTaskGroupName = "config"
ConfigTaskName = "config"
ConfigTaskDriver = "exec"
ConfigTaskCommand = "true"
ConfigMetaUsedKey = "used"
ConfigMetaUsedValue = "true"
ConfigMetaUnusedValue = "false"
ConfigMetaTimeoutKey = "timeout"
ConfigMetaPoolSizeKey = "prewarmingPoolSize"
)
var (
TaskArgs = []string{"infinity"}
ErrorConfigTaskGroupNotFound = errors.New("config task group not found in job")
)
// FindConfigTaskGroup returns the config task group of a job.
// The config task group should be included in all jobs.
func FindConfigTaskGroup(job *nomadApi.Job) *nomadApi.TaskGroup {
for _, tg := range job.TaskGroups {
if *tg.Name == ConfigTaskGroupName {
return tg
}
}
return nil
}
func SetMetaConfigValue(job *nomadApi.Job, key, value string) error {
configTaskGroup := FindConfigTaskGroup(job)
if configTaskGroup == nil {
return ErrorConfigTaskGroupNotFound
}
configTaskGroup.Meta[key] = value
return nil
}
// RegisterTemplateJob creates a Nomad job based on the default job configuration and the given parameters.
// It registers the job with Nomad and waits until the registration completes.
func (a *APIClient) RegisterTemplateJob(
basisJob *nomadApi.Job,
id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) (*nomadApi.Job, error) {
job := CreateTemplateJob(basisJob, id, prewarmingPoolSize,
cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
evalID, err := a.apiQuerier.RegisterNomadJob(job)
if err != nil {
return nil, fmt.Errorf("couldn't register template job: %w", err)
}
return job, a.MonitorEvaluation(evalID, context.Background())
}
// CreateTemplateJob creates a Nomad job based on the default job configuration and the given parameters.
// It registers the job with Nomad and waits until the registration completes.
func CreateTemplateJob(
basisJob *nomadApi.Job,
id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) *nomadApi.Job {
job := *basisJob
job.ID = &id
job.Name = &id
var taskGroup = createTaskGroup(&job, TaskGroupName)
configureTask(taskGroup, TaskName, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
storeTemplateConfiguration(&job, prewarmingPoolSize)
return &job
}
func (a *APIClient) RegisterRunnerJob(template *nomadApi.Job) error {
storeRunnerConfiguration(template)
evalID, err := a.apiQuerier.RegisterNomadJob(template)
if err != nil {
return fmt.Errorf("couldn't register runner job: %w", err)
}
return a.MonitorEvaluation(evalID, context.Background())
}
func createTaskGroup(job *nomadApi.Job, name string) *nomadApi.TaskGroup {
var taskGroup *nomadApi.TaskGroup
if len(job.TaskGroups) == 0 {
taskGroup = nomadApi.NewTaskGroup(name, TaskCount)
job.TaskGroups = []*nomadApi.TaskGroup{taskGroup}
} else {
taskGroup = job.TaskGroups[0]
taskGroup.Name = &name
count := TaskCount
taskGroup.Count = &count
}
return taskGroup
}
func configureNetwork(taskGroup *nomadApi.TaskGroup, networkAccess bool, exposedPorts []uint16) {
if len(taskGroup.Tasks) == 0 {
// This function is only used internally and must be called as last step when configuring the task.
// This error is not recoverable.
log.Fatal("Can't configure network before task has been configured!")
}
task := taskGroup.Tasks[0]
if task.Config == nil {
task.Config = make(map[string]interface{})
}
if networkAccess {
var networkResource *nomadApi.NetworkResource
if len(taskGroup.Networks) == 0 {
networkResource = &nomadApi.NetworkResource{}
taskGroup.Networks = []*nomadApi.NetworkResource{networkResource}
} else {
networkResource = taskGroup.Networks[0]
}
// Prefer "bridge" network over "host" to have an isolated network namespace with bridged interface
// instead of joining the host network namespace.
networkResource.Mode = "bridge"
for _, portNumber := range exposedPorts {
port := nomadApi.Port{
Label: strconv.FormatUint(uint64(portNumber), 10),
To: int(portNumber),
}
networkResource.DynamicPorts = append(networkResource.DynamicPorts, port)
}
// Explicitly set mode to override existing settings when updating job from without to with network.
// Don't use bridge as it collides with the bridge mode above. This results in Docker using 'bridge'
// mode, meaning all allocations will be attached to the `docker0` adapter and could reach other
// non-Nomad containers attached to it. This is avoided when using Nomads bridge network mode.
task.Config["network_mode"] = ""
} else {
// Somehow, we can't set the network mode to none in the NetworkResource on task group level.
// See https://github.com/hashicorp/nomad/issues/10540
task.Config["network_mode"] = "none"
// Explicitly set Networks to signal Nomad to remove the possibly existing networkResource
taskGroup.Networks = []*nomadApi.NetworkResource{}
}
}
func configureTask(
taskGroup *nomadApi.TaskGroup,
name string,
cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) {
var task *nomadApi.Task
if len(taskGroup.Tasks) == 0 {
task = nomadApi.NewTask(name, TaskDriver)
taskGroup.Tasks = []*nomadApi.Task{task}
} else {
task = taskGroup.Tasks[0]
task.Name = name
}
integerCPULimit := int(cpuLimit)
integerMemoryLimit := int(memoryLimit)
if task.Resources == nil {
task.Resources = nomadApi.DefaultResources()
}
task.Resources.CPU = &integerCPULimit
task.Resources.MemoryMB = &integerMemoryLimit
if task.Config == nil {
task.Config = make(map[string]interface{})
}
task.Config["image"] = image
task.Config["command"] = TaskCommand
task.Config["args"] = TaskArgs
configureNetwork(taskGroup, networkAccess, exposedPorts)
}
func storeTemplateConfiguration(job *nomadApi.Job, prewarmingPoolSize uint) {
taskGroup := findOrCreateConfigTaskGroup(job)
taskGroup.Meta = make(map[string]string)
taskGroup.Meta[ConfigMetaPoolSizeKey] = strconv.Itoa(int(prewarmingPoolSize))
}
func storeRunnerConfiguration(job *nomadApi.Job) {
taskGroup := findOrCreateConfigTaskGroup(job)
taskGroup.Meta = make(map[string]string)
taskGroup.Meta[ConfigMetaUsedKey] = ConfigMetaUnusedValue
}
func findOrCreateConfigTaskGroup(job *nomadApi.Job) *nomadApi.TaskGroup {
taskGroup := FindConfigTaskGroup(job)
if taskGroup == nil {
taskGroup = nomadApi.NewTaskGroup(ConfigTaskGroupName, 0)
job.AddTaskGroup(taskGroup)
}
createConfigTaskIfNotPresent(taskGroup)
return taskGroup
}
// createConfigTaskIfNotPresent ensures that a dummy task is in the task group so that the group is accepted by Nomad.
func createConfigTaskIfNotPresent(taskGroup *nomadApi.TaskGroup) {
var task *nomadApi.Task
for _, t := range taskGroup.Tasks {
if t.Name == ConfigTaskName {
task = t
break
}
}
if task == nil {
task = nomadApi.NewTask(ConfigTaskName, ConfigTaskDriver)
taskGroup.Tasks = append(taskGroup.Tasks, task)
}
if task.Config == nil {
task.Config = make(map[string]interface{})
}
task.Config["command"] = ConfigTaskCommand
}

279
internal/nomad/job_test.go Normal file
View File

@ -0,0 +1,279 @@
package nomad
import (
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests/helpers"
"strconv"
"testing"
)
func createTestTaskGroup() *nomadApi.TaskGroup {
return nomadApi.NewTaskGroup("taskGroup", 1)
}
func createTestTask() *nomadApi.Task {
return nomadApi.NewTask("task", "docker")
}
func createTestResources() *nomadApi.Resources {
result := nomadApi.DefaultResources()
expectedCPULimit := 1337
expectedMemoryLimit := 42
result.CPU = &expectedCPULimit
result.MemoryMB = &expectedMemoryLimit
return result
}
func TestCreateTaskGroupCreatesNewTaskGroupWhenJobHasNoTaskGroup(t *testing.T) {
job := nomadApi.NewBatchJob("test", "test", "test", 1)
if assert.Equal(t, 0, len(job.TaskGroups)) {
expectedTaskGroup := createTestTaskGroup()
taskGroup := createTaskGroup(job, *expectedTaskGroup.Name)
assert.Equal(t, *expectedTaskGroup, *taskGroup)
assert.Equal(t, []*nomadApi.TaskGroup{taskGroup}, job.TaskGroups, "it should add the task group to the job")
}
}
func TestCreateTaskGroupOverwritesOptionsWhenJobHasTaskGroup(t *testing.T) {
job := nomadApi.NewBatchJob("test", "test", "test", 1)
existingTaskGroup := createTestTaskGroup()
existingTaskGroup.Meta = map[string]string{"field": "should still exist"}
newTaskGroupList := []*nomadApi.TaskGroup{existingTaskGroup}
job.TaskGroups = newTaskGroupList
newName := *existingTaskGroup.Name + "longerName"
taskGroup := createTaskGroup(job, newName)
// create a new copy to avoid changing the original one as it is a pointer
expectedTaskGroup := *existingTaskGroup
expectedTaskGroup.Name = &newName
assert.Equal(t, expectedTaskGroup, *taskGroup)
assert.Equal(t, newTaskGroupList, job.TaskGroups, "it should not modify the jobs task group list")
}
func TestConfigureNetworkFatalsWhenNoTaskExists(t *testing.T) {
logger, hook := test.NewNullLogger()
logger.ExitFunc = func(i int) {
panic(i)
}
log = logger.WithField("pkg", "job_test")
taskGroup := createTestTaskGroup()
if assert.Equal(t, 0, len(taskGroup.Tasks)) {
assert.Panics(t, func() {
configureNetwork(taskGroup, false, nil)
})
assert.Equal(t, logrus.FatalLevel, hook.LastEntry().Level)
}
}
func TestConfigureNetworkCreatesNewNetworkWhenNoNetworkExists(t *testing.T) {
taskGroup := createTestTaskGroup()
task := createTestTask()
taskGroup.Tasks = []*nomadApi.Task{task}
if assert.Equal(t, 0, len(taskGroup.Networks)) {
configureNetwork(taskGroup, true, []uint16{})
assert.Equal(t, 1, len(taskGroup.Networks))
}
}
func TestConfigureNetworkDoesNotCreateNewNetworkWhenNetworkExists(t *testing.T) {
taskGroup := createTestTaskGroup()
task := createTestTask()
taskGroup.Tasks = []*nomadApi.Task{task}
networkResource := &nomadApi.NetworkResource{Mode: "bridge"}
taskGroup.Networks = []*nomadApi.NetworkResource{networkResource}
if assert.Equal(t, 1, len(taskGroup.Networks)) {
configureNetwork(taskGroup, true, []uint16{})
assert.Equal(t, 1, len(taskGroup.Networks))
assert.Equal(t, networkResource, taskGroup.Networks[0])
}
}
func TestConfigureNetworkSetsCorrectValues(t *testing.T) {
taskGroup := createTestTaskGroup()
task := createTestTask()
_, ok := task.Config["network_mode"]
require.False(t, ok, "Test tasks network_mode should not be set")
taskGroup.Tasks = []*nomadApi.Task{task}
exposedPortsTests := [][]uint16{{}, {1337}, {42, 1337}}
t.Run("with no network access", func(t *testing.T) {
for _, ports := range exposedPortsTests {
testTaskGroup := *taskGroup
testTask := *task
testTaskGroup.Tasks = []*nomadApi.Task{&testTask}
configureNetwork(&testTaskGroup, false, ports)
mode, ok := testTask.Config["network_mode"]
assert.True(t, ok)
assert.Equal(t, "none", mode)
assert.Equal(t, 0, len(testTaskGroup.Networks))
}
})
t.Run("with network access", func(t *testing.T) {
for _, ports := range exposedPortsTests {
testTaskGroup := *taskGroup
testTask := *task
testTaskGroup.Tasks = []*nomadApi.Task{&testTask}
configureNetwork(&testTaskGroup, true, ports)
require.Equal(t, 1, len(testTaskGroup.Networks))
networkResource := testTaskGroup.Networks[0]
assert.Equal(t, "bridge", networkResource.Mode)
require.Equal(t, len(ports), len(networkResource.DynamicPorts))
assertExpectedPorts(t, ports, networkResource)
mode, ok := testTask.Config["network_mode"]
assert.True(t, ok)
assert.Equal(t, mode, "")
}
})
}
func assertExpectedPorts(t *testing.T, expectedPorts []uint16, networkResource *nomadApi.NetworkResource) {
t.Helper()
for _, expectedPort := range expectedPorts {
found := false
for _, actualPort := range networkResource.DynamicPorts {
if actualPort.To == int(expectedPort) {
found = true
break
}
}
assert.True(t, found, fmt.Sprintf("port list should contain %v", expectedPort))
}
}
func TestConfigureTaskWhenNoTaskExists(t *testing.T) {
taskGroup := createTestTaskGroup()
require.Equal(t, 0, len(taskGroup.Tasks))
expectedResources := createTestResources()
expectedTaskGroup := *taskGroup
expectedTask := nomadApi.NewTask("task", TaskDriver)
expectedTask.Resources = expectedResources
expectedImage := "python:latest"
expectedCommand := "sleep"
expectedArgs := []string{"infinity"}
expectedTask.Config = map[string]interface{}{
"image": expectedImage, "command": expectedCommand, "args": expectedArgs, "network_mode": "none"}
expectedTaskGroup.Tasks = []*nomadApi.Task{expectedTask}
expectedTaskGroup.Networks = []*nomadApi.NetworkResource{}
configureTask(taskGroup, expectedTask.Name,
uint(*expectedResources.CPU), uint(*expectedResources.MemoryMB),
expectedImage, false, []uint16{})
assert.Equal(t, expectedTaskGroup, *taskGroup)
}
func TestConfigureTaskWhenTaskExists(t *testing.T) {
taskGroup := createTestTaskGroup()
task := createTestTask()
task.Config = map[string]interface{}{"my_custom_config": "should not be overwritten"}
taskGroup.Tasks = []*nomadApi.Task{task}
require.Equal(t, 1, len(taskGroup.Tasks))
expectedResources := createTestResources()
expectedTaskGroup := *taskGroup
expectedTask := *task
expectedTask.Resources = expectedResources
expectedImage := "python:latest"
expectedTask.Config["image"] = expectedImage
expectedTask.Config["network_mode"] = "none"
expectedTaskGroup.Tasks = []*nomadApi.Task{&expectedTask}
expectedTaskGroup.Networks = []*nomadApi.NetworkResource{}
configureTask(taskGroup, expectedTask.Name,
uint(*expectedResources.CPU), uint(*expectedResources.MemoryMB),
expectedImage, false, []uint16{})
assert.Equal(t, expectedTaskGroup, *taskGroup)
assert.Equal(t, task, taskGroup.Tasks[0], "it should not create a new task")
}
func TestCreateTemplateJobSetsAllGivenArguments(t *testing.T) {
base, testJob := helpers.CreateTemplateJob()
prewarmingPoolSize, err := strconv.Atoi(testJob.TaskGroups[1].Meta[ConfigMetaPoolSizeKey])
require.NoError(t, err)
job := CreateTemplateJob(
base,
tests.DefaultJobID,
uint(prewarmingPoolSize),
uint(*testJob.TaskGroups[0].Tasks[0].Resources.CPU),
uint(*testJob.TaskGroups[0].Tasks[0].Resources.MemoryMB),
testJob.TaskGroups[0].Tasks[0].Config["image"].(string),
false,
nil,
)
assert.Equal(t, *testJob, *job)
}
func TestRegisterTemplateJobFailsWhenNomadJobRegistrationFails(t *testing.T) {
apiMock := apiQuerierMock{}
expectedErr := tests.ErrDefault
apiMock.On("RegisterNomadJob", mock.AnythingOfType("*api.Job")).Return("", expectedErr)
apiClient := &APIClient{&apiMock}
_, err := apiClient.RegisterTemplateJob(&nomadApi.Job{}, tests.DefaultJobID,
1, 2, 3, "image", false, []uint16{})
assert.ErrorIs(t, err, expectedErr)
apiMock.AssertNotCalled(t, "EvaluationStream")
}
func TestRegisterTemplateJobSucceedsWhenMonitoringEvaluationSucceeds(t *testing.T) {
apiMock := apiQuerierMock{}
evaluationID := "id"
stream := make(chan *nomadApi.Events)
readonlyStream := func() <-chan *nomadApi.Events {
return stream
}()
// Immediately close stream to avoid any reading from it resulting in endless wait
close(stream)
apiMock.On("RegisterNomadJob", mock.AnythingOfType("*api.Job")).Return(evaluationID, nil)
apiMock.On("EvaluationStream", evaluationID, mock.AnythingOfType("*context.emptyCtx")).
Return(readonlyStream, nil)
apiClient := &APIClient{&apiMock}
_, err := apiClient.RegisterTemplateJob(&nomadApi.Job{}, tests.DefaultJobID,
1, 2, 3, "image", false, []uint16{})
assert.NoError(t, err)
}
func TestRegisterTemplateJobReturnsErrorWhenMonitoringEvaluationFails(t *testing.T) {
apiMock := apiQuerierMock{}
evaluationID := "id"
apiMock.On("RegisterNomadJob", mock.AnythingOfType("*api.Job")).Return(evaluationID, nil)
apiMock.On("EvaluationStream", evaluationID, mock.AnythingOfType("*context.emptyCtx")).Return(nil, tests.ErrDefault)
apiClient := &APIClient{&apiMock}
_, err := apiClient.RegisterTemplateJob(&nomadApi.Job{}, tests.DefaultJobID,
1, 2, 3, "image", false, []uint16{})
assert.ErrorIs(t, err, tests.ErrDefault)
}

392
internal/nomad/nomad.go Normal file
View File

@ -0,0 +1,392 @@
package nomad
import (
"context"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/config"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/logging"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/nullreader"
"io"
"net/url"
"strconv"
"time"
)
var (
log = logging.GetLogger("nomad")
ErrorExecutorCommunicationFailed = errors.New("communication with executor failed")
ErrorEvaluation = errors.New("evaluation could not complete")
ErrorPlacingAllocations = errors.New("failed to place all allocations")
ErrorLoadingJob = errors.New("failed to load job")
ErrorNoAllocatedResourcesFound = errors.New("no allocated resources found")
)
type AllocationProcessor func(*nomadApi.Allocation)
// ExecutorAPI provides access to an container orchestration solution.
type ExecutorAPI interface {
apiQuerier
// LoadEnvironmentJobs loads all environment jobs.
LoadEnvironmentJobs() ([]*nomadApi.Job, error)
// LoadRunnerJobs loads all runner jobs specific for the environment.
LoadRunnerJobs(environmentID string) ([]*nomadApi.Job, error)
// LoadRunnerIDs returns the IDs of all runners of the specified environment which are running and not about to
// get stopped.
LoadRunnerIDs(environmentID string) (runnerIds []string, err error)
// LoadRunnerPortMappings returns the mapped ports of the runner.
LoadRunnerPortMappings(runnerID string) ([]nomadApi.PortMapping, error)
// RegisterTemplateJob creates a template job based on the default job configuration and the given parameters.
// It registers the job and waits until the registration completes.
RegisterTemplateJob(defaultJob *nomadApi.Job, id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string, networkAccess bool, exposedPorts []uint16) (*nomadApi.Job, error)
// RegisterRunnerJob creates a runner job based on the template job.
// It registers the job and waits until the registration completes.
RegisterRunnerJob(template *nomadApi.Job) error
// MonitorEvaluation monitors the given evaluation ID.
// It waits until the evaluation reaches one of the states complete, canceled or failed.
// If the evaluation was not successful, an error containing the failures is returned.
// See also https://github.com/hashicorp/nomad/blob/7d5a9ecde95c18da94c9b6ace2565afbfdd6a40d/command/monitor.go#L175
MonitorEvaluation(evaluationID string, ctx context.Context) error
// WatchAllocations listens on the Nomad event stream for allocation events.
// Depending on the incoming event, any of the given function is executed.
WatchAllocations(ctx context.Context, onNewAllocation, onDeletedAllocation AllocationProcessor) error
// ExecuteCommand executes the given command in the allocation with the given id.
// It writes the output of the command to stdout/stderr and reads input from stdin.
// If tty is true, the command will run with a tty.
ExecuteCommand(allocationID string, ctx context.Context, command []string, tty bool,
stdin io.Reader, stdout, stderr io.Writer) (int, error)
// MarkRunnerAsUsed marks the runner with the given ID as used. It also stores the timeout duration in the metadata.
MarkRunnerAsUsed(runnerID string, duration int) error
}
// APIClient implements the ExecutorAPI interface and can be used to perform different operations on the real
// Executor API and its return values.
type APIClient struct {
apiQuerier
}
// NewExecutorAPI creates a new api client.
// One client is usually sufficient for the complete runtime of the API.
func NewExecutorAPI(nomadURL *url.URL, nomadNamespace string) (ExecutorAPI, error) {
client := &APIClient{apiQuerier: &nomadAPIClient{}}
err := client.init(nomadURL, nomadNamespace)
return client, err
}
// init prepares an apiClient to be able to communicate to a provided Nomad API.
func (a *APIClient) init(nomadURL *url.URL, nomadNamespace string) error {
if err := a.apiQuerier.init(nomadURL, nomadNamespace); err != nil {
return fmt.Errorf("error initializing API querier: %w", err)
}
return nil
}
func (a *APIClient) LoadRunnerIDs(environmentID string) (runnerIDs []string, err error) {
list, err := a.listJobs(environmentID)
if err != nil {
return nil, err
}
for _, jobListStub := range list {
allocationRunning := jobListStub.JobSummary.Summary[TaskGroupName].Running > 0
if jobListStub.Status == structs.JobStatusRunning && allocationRunning {
runnerIDs = append(runnerIDs, jobListStub.ID)
}
}
return runnerIDs, nil
}
func (a *APIClient) LoadRunnerPortMappings(runnerID string) ([]nomadApi.PortMapping, error) {
alloc, err := a.apiQuerier.allocation(runnerID)
if err != nil {
return nil, fmt.Errorf("error querying allocation for runner %s: %w", runnerID, err)
}
if alloc.AllocatedResources == nil {
return nil, ErrorNoAllocatedResourcesFound
}
return alloc.AllocatedResources.Shared.Ports, nil
}
func (a *APIClient) LoadRunnerJobs(environmentID string) ([]*nomadApi.Job, error) {
runnerIDs, err := a.LoadRunnerIDs(environmentID)
if err != nil {
return []*nomadApi.Job{}, fmt.Errorf("couldn't load jobs: %w", err)
}
var occurredError error
jobs := make([]*nomadApi.Job, 0, len(runnerIDs))
for _, id := range runnerIDs {
job, err := a.apiQuerier.job(id)
if err != nil {
if occurredError == nil {
occurredError = ErrorLoadingJob
}
occurredError = fmt.Errorf("%w: couldn't load job info for runner %s - %v", occurredError, id, err)
continue
}
jobs = append(jobs, job)
}
return jobs, occurredError
}
func (a *APIClient) MonitorEvaluation(evaluationID string, ctx context.Context) error {
stream, err := a.apiQuerier.EvaluationStream(evaluationID, ctx)
if err != nil {
return fmt.Errorf("failed retrieving evaluation stream: %w", err)
}
// If ctx is canceled, the stream will be closed by Nomad and we exit the for loop.
return receiveAndHandleNomadAPIEvents(stream, handleEvaluationEvent)
}
func (a *APIClient) WatchAllocations(ctx context.Context,
onNewAllocation, onDeletedAllocation AllocationProcessor) error {
startTime := time.Now().UnixNano()
stream, err := a.AllocationStream(ctx)
if err != nil {
return fmt.Errorf("failed retrieving allocation stream: %w", err)
}
pendingAllocations := make(map[string]bool)
handler := func(event *nomadApi.Event) (bool, error) {
return false, handleAllocationEvent(startTime, pendingAllocations, event, onNewAllocation, onDeletedAllocation)
}
err = receiveAndHandleNomadAPIEvents(stream, handler)
return err
}
// nomadAPIEventHandler is a function that receives a nomadApi.Event and processes it.
// It is called by an event listening loop. For each received event, the function is called.
// If done is true, the calling function knows that it should break out of the event listening
// loop.
type nomadAPIEventHandler func(event *nomadApi.Event) (done bool, err error)
// receiveAndHandleNomadAPIEvents receives events from the Nomad event stream and calls the handler function for
// each received event. It skips heartbeat events and returns an error if the received events contain an error.
func receiveAndHandleNomadAPIEvents(stream <-chan *nomadApi.Events, handler nomadAPIEventHandler) error {
// If original context is canceled, the stream will be closed by Nomad and we exit the for loop.
for events := range stream {
if events.IsHeartbeat() {
continue
}
if err := events.Err; err != nil {
return fmt.Errorf("error receiving events: %w", err)
}
for _, event := range events.Events {
// Don't take the address of the loop variable as the underlying value might change
eventCopy := event
done, err := handler(&eventCopy)
if err != nil || done {
return err
}
}
}
return nil
}
// handleEvaluationEvent is a nomadAPIEventHandler that returns whether the evaluation described by the event
// was successful.
func handleEvaluationEvent(event *nomadApi.Event) (bool, error) {
eval, err := event.Evaluation()
if err != nil {
return true, fmt.Errorf("failed to monitor evaluation: %w", err)
}
switch eval.Status {
case structs.EvalStatusComplete, structs.EvalStatusCancelled, structs.EvalStatusFailed:
return true, checkEvaluation(eval)
}
return false, nil
}
// handleAllocationEvent is a nomadAPIEventHandler that processes allocation events.
// If a new allocation is received, onNewAllocation is called. If an allocation is deleted, onDeletedAllocation
// is called. The pendingAllocations map is used to store allocations that are pending but not started yet. Using the
// map the state is persisted between multiple calls of this function.
func handleAllocationEvent(startTime int64, pendingAllocations map[string]bool, event *nomadApi.Event,
onNewAllocation, onDeletedAllocation AllocationProcessor) error {
if event.Type != structs.TypeAllocationUpdated {
return nil
}
alloc, err := event.Allocation()
if err != nil {
return fmt.Errorf("failed to retrieve allocation from event: %w", err)
} else if alloc == nil {
return nil
}
// When starting the API and listening on the Nomad event stream we might get events that already
// happened from Nomad as it seems to buffer them for a certain duration.
// Ignore old events here.
if alloc.ModifyTime < startTime {
return nil
}
if alloc.ClientStatus == structs.AllocClientStatusRunning {
switch alloc.DesiredStatus {
case structs.AllocDesiredStatusStop:
onDeletedAllocation(alloc)
case structs.AllocDesiredStatusRun:
// is first event that marks the transition between pending and running?
_, ok := pendingAllocations[alloc.ID]
if ok {
onNewAllocation(alloc)
delete(pendingAllocations, alloc.ID)
}
}
}
if alloc.ClientStatus == structs.AllocClientStatusPending && alloc.DesiredStatus == structs.AllocDesiredStatusRun {
// allocation is started, wait until it runs and add to our list afterwards
pendingAllocations[alloc.ID] = true
}
return nil
}
// checkEvaluation checks whether the given evaluation failed.
// If the evaluation failed, it returns an error with a message containing the failure information.
func checkEvaluation(eval *nomadApi.Evaluation) (err error) {
if len(eval.FailedTGAllocs) == 0 {
if eval.Status != structs.EvalStatusComplete {
err = fmt.Errorf("%w: %q", ErrorEvaluation, eval.Status)
}
} else {
err = fmt.Errorf("evaluation %q finished with status %q but %w", eval.ID, eval.Status, ErrorPlacingAllocations)
for taskGroup, metrics := range eval.FailedTGAllocs {
err = fmt.Errorf("%w\n%s: %#v", err, taskGroup, metrics)
}
if eval.BlockedEval != "" {
err = fmt.Errorf("%w\nEvaluation %q waiting for additional capacity to place remainder", err, eval.BlockedEval)
}
}
return err
}
func (a *APIClient) MarkRunnerAsUsed(runnerID string, duration int) error {
job, err := a.job(runnerID)
if err != nil {
return fmt.Errorf("couldn't retrieve job info: %w", err)
}
err = SetMetaConfigValue(job, ConfigMetaUsedKey, ConfigMetaUsedValue)
if err != nil {
return fmt.Errorf("couldn't update runner in job as used: %w", err)
}
err = SetMetaConfigValue(job, ConfigMetaTimeoutKey, strconv.Itoa(duration))
if err != nil {
return fmt.Errorf("couldn't update runner in job with timeout: %w", err)
}
_, err = a.RegisterNomadJob(job)
if err != nil {
return fmt.Errorf("couldn't update runner config: %w", err)
}
return nil
}
func (a *APIClient) LoadEnvironmentJobs() ([]*nomadApi.Job, error) {
jobStubs, err := a.listJobs(TemplateJobPrefix)
if err != nil {
return []*nomadApi.Job{}, fmt.Errorf("couldn't load jobs: %w", err)
}
jobs := make([]*nomadApi.Job, 0, len(jobStubs))
for _, jobStub := range jobStubs {
job, err := a.apiQuerier.job(jobStub.ID)
if err != nil {
return []*nomadApi.Job{}, fmt.Errorf("couldn't load job info for job %v: %w", jobStub.ID, err)
}
jobs = append(jobs, job)
}
return jobs, nil
}
// ExecuteCommand executes the given command in the given allocation.
// If tty is true, Nomad would normally write stdout and stderr of the command
// both on the stdout stream. However, if the InteractiveStderr server config option is true,
// we make sure that stdout and stderr are split correctly.
// In order for the stderr splitting to work, the command must have the structure
// []string{..., "sh", "-c", "my-command"}.
func (a *APIClient) ExecuteCommand(allocationID string,
ctx context.Context, command []string, tty bool,
stdin io.Reader, stdout, stderr io.Writer) (int, error) {
if tty && config.Config.Server.InteractiveStderr {
return a.executeCommandInteractivelyWithStderr(allocationID, ctx, command, stdin, stdout, stderr)
}
exitCode, err := a.apiQuerier.Execute(allocationID, ctx, command, tty, stdin, stdout, stderr)
if err != nil {
return 1, fmt.Errorf("error executing command in API: %w", err)
}
return exitCode, nil
}
// executeCommandInteractivelyWithStderr executes the given command interactively and splits stdout
// and stderr correctly. Normally, using Nomad to execute a command with tty=true (in order to have
// an interactive connection and possibly a fully working shell), would result in stdout and stderr
// to be served both over stdout. This function circumvents this by creating a fifo for the stderr
// of the command and starting a second execution that reads the stderr from that fifo.
func (a *APIClient) executeCommandInteractivelyWithStderr(allocationID string, ctx context.Context,
command []string, stdin io.Reader, stdout, stderr io.Writer) (int, error) {
// Use current nano time to make the stderr fifo kind of unique.
currentNanoTime := time.Now().UnixNano()
// We expect the command to be like []string{..., "sh", "-c", "my-command"}.
oldCommand := command[len(command)-1]
// Take the last command which is the one to be executed and wrap it to redirect stderr.
command[len(command)-1] = wrapCommandForStderrFifo(currentNanoTime, oldCommand)
stderrExitChan := make(chan int)
go func() {
// Catch stderr in separate execution.
exit, err := a.Execute(allocationID, ctx, stderrFifoCommand(currentNanoTime), true,
nullreader.NullReader{}, stderr, io.Discard)
if err != nil {
log.WithError(err).WithField("runner", allocationID).Warn("Stderr task finished with error")
}
stderrExitChan <- exit
}()
exit, err := a.Execute(allocationID, ctx, command, true, stdin, stdout, io.Discard)
// Wait until the stderr catch command finished to make sure we receive all output.
<-stderrExitChan
return exit, err
}
const (
// stderrFifoFormat represents the format we use for our stderr fifos. The %d should be unique for the execution
// as otherwise multiple executions are not possible.
// Example: "/tmp/stderr_1623330777825234133.fifo".
stderrFifoFormat = "/tmp/stderr_%d.fifo"
// stderrFifoCommandFormat, if executed, is supposed to create a fifo, read from it and remove it in the end.
// Example: "mkfifo my.fifo && (cat my.fifo; rm my.fifo)".
stderrFifoCommandFormat = "mkfifo %s && (cat %s; rm %s)"
// stderrWrapperCommandFormat, if executed, is supposed to wait until a fifo exists (it sleeps 10ms to reduce load
// cause by busy waiting on the system). Once the fifo exists, the given command is executed and its stderr
// redirected to the fifo.
// Example: "until [ -e my.fifo ]; do sleep 0.01; done; (echo \"my.fifo exists\") 2> my.fifo".
stderrWrapperCommandFormat = "until [ -e %s ]; do sleep 0.01; done; (%s) 2> %s"
)
func stderrFifoCommand(id int64) []string {
stderrFifoPath := stderrFifo(id)
return []string{"sh", "-c", fmt.Sprintf(stderrFifoCommandFormat, stderrFifoPath, stderrFifoPath, stderrFifoPath)}
}
func wrapCommandForStderrFifo(id int64, command string) string {
stderrFifoPath := stderrFifo(id)
return fmt.Sprintf(stderrWrapperCommandFormat, stderrFifoPath, command, stderrFifoPath)
}
func stderrFifo(id int64) string {
return fmt.Sprintf(stderrFifoFormat, id)
}

View File

@ -0,0 +1,789 @@
package nomad
import (
"bytes"
"context"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/mitchellh/mapstructure"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/config"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/nullreader"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"io"
"net/url"
"regexp"
"strings"
"testing"
"time"
)
func TestLoadRunnersTestSuite(t *testing.T) {
suite.Run(t, new(LoadRunnersTestSuite))
}
type LoadRunnersTestSuite struct {
suite.Suite
jobID string
mock *apiQuerierMock
nomadAPIClient APIClient
availableRunner *nomadApi.JobListStub
anotherAvailableRunner *nomadApi.JobListStub
pendingRunner *nomadApi.JobListStub
deadRunner *nomadApi.JobListStub
}
func (s *LoadRunnersTestSuite) SetupTest() {
s.jobID = tests.DefaultJobID
s.mock = &apiQuerierMock{}
s.nomadAPIClient = APIClient{apiQuerier: s.mock}
s.availableRunner = newJobListStub(tests.DefaultJobID, structs.JobStatusRunning, 1)
s.anotherAvailableRunner = newJobListStub(tests.AnotherJobID, structs.JobStatusRunning, 1)
s.pendingRunner = newJobListStub(tests.DefaultJobID+"-1", structs.JobStatusPending, 0)
s.deadRunner = newJobListStub(tests.AnotherJobID+"-1", structs.JobStatusDead, 0)
}
func newJobListStub(id, status string, amountRunning int) *nomadApi.JobListStub {
return &nomadApi.JobListStub{
ID: id,
Status: status,
JobSummary: &nomadApi.JobSummary{
JobID: id,
Summary: map[string]nomadApi.TaskGroupSummary{TaskGroupName: {Running: amountRunning}},
},
}
}
func (s *LoadRunnersTestSuite) TestErrorOfUnderlyingApiCallIsPropagated() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return(nil, tests.ErrDefault)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Nil(returnedIds)
s.Equal(tests.ErrDefault, err)
}
func (s *LoadRunnersTestSuite) TestReturnsNoErrorWhenUnderlyingApiCallDoesNot() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{}, nil)
_, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.NoError(err)
}
func (s *LoadRunnersTestSuite) TestAvailableRunnerIsReturned() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{s.availableRunner}, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Len(returnedIds, 1)
s.Equal(s.availableRunner.ID, returnedIds[0])
}
func (s *LoadRunnersTestSuite) TestPendingRunnerIsNotReturned() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{s.pendingRunner}, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Empty(returnedIds)
}
func (s *LoadRunnersTestSuite) TestDeadRunnerIsNotReturned() {
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return([]*nomadApi.JobListStub{s.deadRunner}, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Empty(returnedIds)
}
func (s *LoadRunnersTestSuite) TestReturnsAllAvailableRunners() {
runnersList := []*nomadApi.JobListStub{
s.availableRunner,
s.anotherAvailableRunner,
s.pendingRunner,
s.deadRunner,
}
s.mock.On("listJobs", mock.AnythingOfType("string")).
Return(runnersList, nil)
returnedIds, err := s.nomadAPIClient.LoadRunnerIDs(s.jobID)
s.Require().NoError(err)
s.Len(returnedIds, 2)
s.Contains(returnedIds, s.availableRunner.ID)
s.Contains(returnedIds, s.anotherAvailableRunner.ID)
}
var (
TestURL = url.URL{
Scheme: "http",
Host: "127.0.0.1:4646",
}
)
const TestNamespace = "unit-tests"
func TestApiClient_init(t *testing.T) {
client := &APIClient{apiQuerier: &nomadAPIClient{}}
err := client.init(&TestURL, TestNamespace)
require.Nil(t, err)
}
func TestApiClientCanNotBeInitializedWithInvalidUrl(t *testing.T) {
client := &APIClient{apiQuerier: &nomadAPIClient{}}
err := client.init(&url.URL{
Scheme: "http",
Host: "http://127.0.0.1:4646",
}, TestNamespace)
assert.NotNil(t, err)
}
func TestNewExecutorApiCanBeCreatedWithoutError(t *testing.T) {
expectedClient := &APIClient{apiQuerier: &nomadAPIClient{}}
err := expectedClient.init(&TestURL, TestNamespace)
require.Nil(t, err)
_, err = NewExecutorAPI(&TestURL, TestNamespace)
require.Nil(t, err)
}
// asynchronouslyMonitorEvaluation creates an APIClient with mocked Nomad API and
// runs the MonitorEvaluation method in a goroutine. The mock returns a read-only
// version of the given stream to simulate an event stream gotten from the real
// Nomad API.
func asynchronouslyMonitorEvaluation(stream chan *nomadApi.Events) chan error {
ctx := context.Background()
// We can only get a read-only channel once we return it from a function.
readOnlyStream := func() <-chan *nomadApi.Events { return stream }()
apiMock := &apiQuerierMock{}
apiMock.On("EvaluationStream", mock.AnythingOfType("string"), ctx).Return(readOnlyStream, nil)
apiClient := &APIClient{apiMock}
errChan := make(chan error)
go func() {
errChan <- apiClient.MonitorEvaluation("id", ctx)
}()
return errChan
}
func TestApiClient_MonitorEvaluationReturnsNilWhenStreamIsClosed(t *testing.T) {
stream := make(chan *nomadApi.Events)
errChan := asynchronouslyMonitorEvaluation(stream)
close(stream)
var err error
// If close doesn't terminate MonitorEvaluation, this test won't complete without a timeout.
select {
case err = <-errChan:
case <-time.After(time.Millisecond * 10):
t.Fatal("MonitorEvaluation didn't finish as expected")
}
assert.Nil(t, err)
}
func TestApiClient_MonitorEvaluationReturnsErrorWhenStreamReturnsError(t *testing.T) {
apiMock := &apiQuerierMock{}
apiMock.On("EvaluationStream", mock.AnythingOfType("string"), mock.AnythingOfType("*context.emptyCtx")).
Return(nil, tests.ErrDefault)
apiClient := &APIClient{apiMock}
err := apiClient.MonitorEvaluation("id", context.Background())
assert.ErrorIs(t, err, tests.ErrDefault)
}
type eventPayload struct {
Evaluation *nomadApi.Evaluation
Allocation *nomadApi.Allocation
}
// eventForEvaluation takes an evaluation and creates an Event with the given evaluation
// as its payload. Nomad uses the mapstructure library to decode the payload, which we
// simply reverse here.
func eventForEvaluation(t *testing.T, eval *nomadApi.Evaluation) nomadApi.Event {
t.Helper()
payload := make(map[string]interface{})
err := mapstructure.Decode(eventPayload{Evaluation: eval}, &payload)
if err != nil {
t.Fatalf("Couldn't decode evaluation %v into payload map", eval)
return nomadApi.Event{}
}
event := nomadApi.Event{Topic: nomadApi.TopicEvaluation, Payload: payload}
return event
}
// simulateNomadEventStream streams the given events sequentially to the stream channel.
// It returns how many events have been processed until an error occurred.
func simulateNomadEventStream(
stream chan *nomadApi.Events,
errChan chan error,
events []*nomadApi.Events,
) (int, error) {
eventsProcessed := 0
var e *nomadApi.Events
for _, e = range events {
select {
case err := <-errChan:
return eventsProcessed, err
case stream <- e:
eventsProcessed++
}
}
// Wait for last event being processed
var err error
select {
case <-time.After(10 * time.Millisecond):
case err = <-errChan:
}
return eventsProcessed, err
}
// runEvaluationMonitoring simulates events streamed from the Nomad event stream
// to the MonitorEvaluation method. It starts the MonitorEvaluation function as a goroutine
// and sequentially transfers the events from the given array to a channel simulating the stream.
func runEvaluationMonitoring(events []*nomadApi.Events) (eventsProcessed int, err error) {
stream := make(chan *nomadApi.Events)
errChan := asynchronouslyMonitorEvaluation(stream)
return simulateNomadEventStream(stream, errChan, events)
}
func TestApiClient_MonitorEvaluationWithSuccessfulEvent(t *testing.T) {
eval := nomadApi.Evaluation{Status: structs.EvalStatusComplete}
pendingEval := nomadApi.Evaluation{Status: structs.EvalStatusPending}
// make sure that the tested function can complete
require.Nil(t, checkEvaluation(&eval))
events := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, &eval)}}
pendingEvaluationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, &pendingEval)}}
multipleEventsWithPending := nomadApi.Events{Events: []nomadApi.Event{
eventForEvaluation(t, &pendingEval), eventForEvaluation(t, &eval),
}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedEventsProcessed int
name string
}{
{[]*nomadApi.Events{&events}, 1,
"it completes with successful event"},
{[]*nomadApi.Events{&events, &events}, 1,
"it completes at first successful event"},
{[]*nomadApi.Events{{}, &events}, 2,
"it skips heartbeat and completes"},
{[]*nomadApi.Events{&pendingEvaluationEvents, &events}, 2,
"it skips pending evaluation and completes"},
{[]*nomadApi.Events{&multipleEventsWithPending}, 1,
"it handles multiple events per received event"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
eventsProcessed, err := runEvaluationMonitoring(c.streamedEvents)
assert.Nil(t, err)
assert.Equal(t, c.expectedEventsProcessed, eventsProcessed)
})
}
}
func TestApiClient_MonitorEvaluationWithFailingEvent(t *testing.T) {
eval := nomadApi.Evaluation{Status: structs.EvalStatusFailed}
evalErr := checkEvaluation(&eval)
require.NotNil(t, evalErr)
pendingEval := nomadApi.Evaluation{Status: structs.EvalStatusPending}
events := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, &eval)}}
pendingEvaluationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForEvaluation(t, &pendingEval)}}
multipleEventsWithPending := nomadApi.Events{Events: []nomadApi.Event{
eventForEvaluation(t, &pendingEval), eventForEvaluation(t, &eval),
}}
eventsWithErr := nomadApi.Events{Err: tests.ErrDefault, Events: []nomadApi.Event{{}}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedEventsProcessed int
expectedError error
name string
}{
{[]*nomadApi.Events{&events}, 1, evalErr,
"it fails with failing event"},
{[]*nomadApi.Events{&events, &events}, 1, evalErr,
"it fails at first failing event"},
{[]*nomadApi.Events{{}, &events}, 2, evalErr,
"it skips heartbeat and fail"},
{[]*nomadApi.Events{&pendingEvaluationEvents, &events}, 2, evalErr,
"it skips pending evaluation and fail"},
{[]*nomadApi.Events{&multipleEventsWithPending}, 1, evalErr,
"it handles multiple events per received event and fails"},
{[]*nomadApi.Events{&eventsWithErr}, 1, tests.ErrDefault,
"it fails with event error when event has error"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
eventsProcessed, err := runEvaluationMonitoring(c.streamedEvents)
require.NotNil(t, err)
assert.Contains(t, err.Error(), c.expectedError.Error())
assert.Equal(t, c.expectedEventsProcessed, eventsProcessed)
})
}
}
func TestApiClient_MonitorEvaluationFailsWhenFailingToDecodeEvaluation(t *testing.T) {
event := nomadApi.Event{
Topic: nomadApi.TopicEvaluation,
// This should fail decoding, as Evaluation.Status is expected to be a string, not int
Payload: map[string]interface{}{"Evaluation": map[string]interface{}{"Status": 1}},
}
_, err := event.Evaluation()
require.NotNil(t, err)
eventsProcessed, err := runEvaluationMonitoring([]*nomadApi.Events{{Events: []nomadApi.Event{event}}})
assert.Error(t, err)
assert.Equal(t, 1, eventsProcessed)
}
func TestCheckEvaluationWithFailedAllocations(t *testing.T) {
testKey := "test1"
failedAllocs := map[string]*nomadApi.AllocationMetric{
testKey: {NodesExhausted: 1},
}
evaluation := nomadApi.Evaluation{FailedTGAllocs: failedAllocs, Status: structs.EvalStatusFailed}
assertMessageContainsCorrectStrings := func(msg string) {
assert.Contains(t, msg, evaluation.Status, "error should contain the evaluation status")
assert.Contains(t, msg, fmt.Sprintf("%s: %#v", testKey, failedAllocs[testKey]),
"error should contain the failed allocations metric")
}
var msgWithoutBlockedEval, msgWithBlockedEval string
t.Run("without blocked eval", func(t *testing.T) {
err := checkEvaluation(&evaluation)
require.NotNil(t, err)
msgWithoutBlockedEval = err.Error()
assertMessageContainsCorrectStrings(msgWithoutBlockedEval)
})
t.Run("with blocked eval", func(t *testing.T) {
evaluation.BlockedEval = "blocking-eval"
err := checkEvaluation(&evaluation)
require.NotNil(t, err)
msgWithBlockedEval = err.Error()
assertMessageContainsCorrectStrings(msgWithBlockedEval)
})
assert.NotEqual(t, msgWithBlockedEval, msgWithoutBlockedEval)
}
func TestCheckEvaluationWithoutFailedAllocations(t *testing.T) {
evaluation := nomadApi.Evaluation{FailedTGAllocs: make(map[string]*nomadApi.AllocationMetric)}
t.Run("when evaluation status complete", func(t *testing.T) {
evaluation.Status = structs.EvalStatusComplete
err := checkEvaluation(&evaluation)
assert.Nil(t, err)
})
t.Run("when evaluation status not complete", func(t *testing.T) {
incompleteStates := []string{structs.EvalStatusFailed, structs.EvalStatusCancelled,
structs.EvalStatusBlocked, structs.EvalStatusPending}
for _, status := range incompleteStates {
evaluation.Status = status
err := checkEvaluation(&evaluation)
require.NotNil(t, err)
assert.Contains(t, err.Error(), status, "error should contain the evaluation status")
}
})
}
func TestApiClient_WatchAllocationsIgnoresOldAllocations(t *testing.T) {
oldStoppedAllocation := createOldAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusStop)
oldPendingAllocation := createOldAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
oldRunningAllocation := createOldAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
oldAllocationEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(t, oldStoppedAllocation),
eventForAllocation(t, oldPendingAllocation),
eventForAllocation(t, oldRunningAllocation),
}}
assertWatchAllocation(t, []*nomadApi.Events{&oldAllocationEvents},
[]*nomadApi.Allocation(nil), []*nomadApi.Allocation(nil))
}
func createOldAllocation(clientStatus, desiredStatus string) *nomadApi.Allocation {
return createAllocation(time.Now().Add(-time.Minute).UnixNano(), clientStatus, desiredStatus)
}
func TestApiClient_WatchAllocationsIgnoresUnhandledEvents(t *testing.T) {
nodeEvents := nomadApi.Events{Events: []nomadApi.Event{
{
Topic: nomadApi.TopicNode,
Type: structs.TypeNodeEvent,
},
}}
assertWatchAllocation(t, []*nomadApi.Events{&nodeEvents}, []*nomadApi.Allocation(nil), []*nomadApi.Allocation(nil))
planEvents := nomadApi.Events{Events: []nomadApi.Event{
{
Topic: nomadApi.TopicAllocation,
Type: structs.TypePlanResult,
},
}}
assertWatchAllocation(t, []*nomadApi.Events{&planEvents}, []*nomadApi.Allocation(nil), []*nomadApi.Allocation(nil))
}
func TestApiClient_WatchAllocationsHandlesEvents(t *testing.T) {
newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
pendingAllocationEvents := nomadApi.Events{Events: []nomadApi.Event{eventForAllocation(t, newPendingAllocation)}}
newStartedAllocation := createRecentAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusRun)
startAllocationEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(t, newPendingAllocation),
eventForAllocation(t, newStartedAllocation),
}}
newStoppedAllocation := createRecentAllocation(structs.AllocClientStatusRunning, structs.AllocDesiredStatusStop)
stopAllocationEvents := nomadApi.Events{Events: []nomadApi.Event{
eventForAllocation(t, newPendingAllocation),
eventForAllocation(t, newStartedAllocation),
eventForAllocation(t, newStoppedAllocation),
}}
var cases = []struct {
streamedEvents []*nomadApi.Events
expectedNewAllocations []*nomadApi.Allocation
expectedDeletedAllocations []*nomadApi.Allocation
name string
}{
{[]*nomadApi.Events{&pendingAllocationEvents},
[]*nomadApi.Allocation(nil), []*nomadApi.Allocation(nil),
"it does not add allocation when client status is pending"},
{[]*nomadApi.Events{&startAllocationEvents},
[]*nomadApi.Allocation{newStartedAllocation},
[]*nomadApi.Allocation(nil),
"it adds allocation with matching events"},
{[]*nomadApi.Events{{}, &startAllocationEvents},
[]*nomadApi.Allocation{newStartedAllocation},
[]*nomadApi.Allocation(nil),
"it skips heartbeat and adds allocation with matching events"},
{[]*nomadApi.Events{&stopAllocationEvents},
[]*nomadApi.Allocation{newStartedAllocation},
[]*nomadApi.Allocation{newStoppedAllocation},
"it adds and deletes the allocation"},
{[]*nomadApi.Events{&startAllocationEvents, &startAllocationEvents},
[]*nomadApi.Allocation{newStartedAllocation, newStartedAllocation},
[]*nomadApi.Allocation(nil),
"it handles multiple events"},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
assertWatchAllocation(t, c.streamedEvents,
c.expectedNewAllocations, c.expectedDeletedAllocations)
})
}
}
func TestHandleAllocationEventBuffersPendingAllocation(t *testing.T) {
newPendingAllocation := createRecentAllocation(structs.AllocClientStatusPending, structs.AllocDesiredStatusRun)
newPendingEvent := eventForAllocation(t, newPendingAllocation)
pendingMap := make(map[string]bool)
var noop AllocationProcessor = func(allocation *nomadApi.Allocation) {}
err := handleAllocationEvent(time.Now().UnixNano(), pendingMap, &newPendingEvent, noop, noop)
require.NoError(t, err)
assert.True(t, pendingMap[newPendingAllocation.ID])
}
func TestAPIClient_WatchAllocationsReturnsErrorWhenAllocationStreamCannotBeRetrieved(t *testing.T) {
apiMock := &apiQuerierMock{}
apiMock.On("AllocationStream", mock.Anything).Return(nil, tests.ErrDefault)
apiClient := &APIClient{apiMock}
noop := func(a *nomadApi.Allocation) {}
err := apiClient.WatchAllocations(context.Background(), noop, noop)
assert.ErrorIs(t, err, tests.ErrDefault)
}
func TestAPIClient_WatchAllocationsReturnsErrorWhenAllocationCannotBeRetrievedWithoutReceivingFurtherEvents(
t *testing.T) {
noop := func(a *nomadApi.Allocation) {}
event := nomadApi.Event{
Type: structs.TypeAllocationUpdated,
Topic: nomadApi.TopicAllocation,
// This should fail decoding, as Allocation.ID is expected to be a string, not int
Payload: map[string]interface{}{"Allocation": map[string]interface{}{"ID": 1}},
}
_, err := event.Allocation()
require.Error(t, err)
events := []*nomadApi.Events{{Events: []nomadApi.Event{event}}, {}}
eventsProcessed, err := runAllocationWatching(t, events, noop, noop)
assert.Error(t, err)
assert.Equal(t, 1, eventsProcessed)
}
func assertWatchAllocation(t *testing.T, events []*nomadApi.Events,
expectedNewAllocations, expectedDeletedAllocations []*nomadApi.Allocation) {
t.Helper()
var newAllocations []*nomadApi.Allocation
onNewAllocation := func(alloc *nomadApi.Allocation) {
newAllocations = append(newAllocations, alloc)
}
var deletedAllocations []*nomadApi.Allocation
onDeletedAllocation := func(alloc *nomadApi.Allocation) {
deletedAllocations = append(deletedAllocations, alloc)
}
eventsProcessed, err := runAllocationWatching(t, events, onNewAllocation, onDeletedAllocation)
assert.NoError(t, err)
assert.Equal(t, len(events), eventsProcessed)
assert.Equal(t, expectedNewAllocations, newAllocations)
assert.Equal(t, expectedDeletedAllocations, deletedAllocations)
}
// runAllocationWatching simulates events streamed from the Nomad event stream
// to the MonitorEvaluation method. It starts the MonitorEvaluation function as a goroutine
// and sequentially transfers the events from the given array to a channel simulating the stream.
func runAllocationWatching(t *testing.T, events []*nomadApi.Events,
onNewAllocation, onDeletedAllocation AllocationProcessor) (eventsProcessed int, err error) {
t.Helper()
stream := make(chan *nomadApi.Events)
errChan := asynchronouslyWatchAllocations(stream, onNewAllocation, onDeletedAllocation)
return simulateNomadEventStream(stream, errChan, events)
}
// asynchronouslyMonitorEvaluation creates an APIClient with mocked Nomad API and
// runs the MonitorEvaluation method in a goroutine. The mock returns a read-only
// version of the given stream to simulate an event stream gotten from the real
// Nomad API.
func asynchronouslyWatchAllocations(stream chan *nomadApi.Events,
onNewAllocation, onDeletedAllocation AllocationProcessor) chan error {
ctx := context.Background()
// We can only get a read-only channel once we return it from a function.
readOnlyStream := func() <-chan *nomadApi.Events { return stream }()
apiMock := &apiQuerierMock{}
apiMock.On("AllocationStream", ctx).Return(readOnlyStream, nil)
apiClient := &APIClient{apiMock}
errChan := make(chan error)
go func() {
errChan <- apiClient.WatchAllocations(ctx, onNewAllocation, onDeletedAllocation)
}()
return errChan
}
// eventForEvaluation takes an evaluation and creates an Event with the given evaluation
// as its payload. Nomad uses the mapstructure library to decode the payload, which we
// simply reverse here.
func eventForAllocation(t *testing.T, alloc *nomadApi.Allocation) nomadApi.Event {
t.Helper()
payload := make(map[string]interface{})
err := mapstructure.Decode(eventPayload{Allocation: alloc}, &payload)
if err != nil {
t.Fatalf("Couldn't decode allocation %v into payload map", err)
return nomadApi.Event{}
}
event := nomadApi.Event{
Topic: nomadApi.TopicAllocation,
Type: structs.TypeAllocationUpdated,
Payload: payload,
}
return event
}
func createAllocation(modifyTime int64, clientStatus, desiredStatus string) *nomadApi.Allocation {
return &nomadApi.Allocation{
ID: tests.DefaultRunnerID,
ModifyTime: modifyTime,
ClientStatus: clientStatus,
DesiredStatus: desiredStatus,
}
}
func createRecentAllocation(clientStatus, desiredStatus string) *nomadApi.Allocation {
return createAllocation(time.Now().Add(time.Minute).UnixNano(), clientStatus, desiredStatus)
}
func TestExecuteCommandTestSuite(t *testing.T) {
suite.Run(t, new(ExecuteCommandTestSuite))
}
type ExecuteCommandTestSuite struct {
suite.Suite
allocationID string
ctx context.Context
testCommand string
testCommandArray []string
expectedStdout string
expectedStderr string
apiMock *apiQuerierMock
nomadAPIClient APIClient
}
func (s *ExecuteCommandTestSuite) SetupTest() {
s.allocationID = "test-allocation-id"
s.ctx = context.Background()
s.testCommand = "echo 'do nothing'"
s.testCommandArray = []string{"sh", "-c", s.testCommand}
s.expectedStdout = "stdout"
s.expectedStderr = "stderr"
s.apiMock = &apiQuerierMock{}
s.nomadAPIClient = APIClient{apiQuerier: s.apiMock}
}
const withTTY = true
func (s *ExecuteCommandTestSuite) TestWithSeparateStderr() {
config.Config.Server.InteractiveStderr = true
commandExitCode := 42
stderrExitCode := 1
var stdout, stderr bytes.Buffer
var calledStdoutCommand, calledStderrCommand []string
// mock regular call
s.mockExecute(s.testCommandArray, commandExitCode, nil, func(args mock.Arguments) {
var ok bool
calledStdoutCommand, ok = args.Get(2).([]string)
s.Require().True(ok)
writer, ok := args.Get(5).(io.Writer)
s.Require().True(ok)
_, err := writer.Write([]byte(s.expectedStdout))
s.Require().NoError(err)
})
// mock stderr call
s.mockExecute(mock.AnythingOfType("[]string"), stderrExitCode, nil, func(args mock.Arguments) {
var ok bool
calledStderrCommand, ok = args.Get(2).([]string)
s.Require().True(ok)
writer, ok := args.Get(5).(io.Writer)
s.Require().True(ok)
_, err := writer.Write([]byte(s.expectedStderr))
s.Require().NoError(err)
})
exitCode, err := s.nomadAPIClient.
ExecuteCommand(s.allocationID, s.ctx, s.testCommandArray, withTTY, nullreader.NullReader{}, &stdout, &stderr)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "Execute", 2)
s.Equal(commandExitCode, exitCode)
s.Run("should wrap command in stderr wrapper", func() {
s.Require().NotNil(calledStdoutCommand)
stderrWrapperCommand := fmt.Sprintf(stderrWrapperCommandFormat, stderrFifoFormat, s.testCommand, stderrFifoFormat)
stdoutFifoRegexp := strings.ReplaceAll(regexp.QuoteMeta(stderrWrapperCommand), "%d", "\\d*")
s.Regexp(stdoutFifoRegexp, calledStdoutCommand[len(calledStdoutCommand)-1])
})
s.Run("should call correct stderr command", func() {
s.Require().NotNil(calledStderrCommand)
stderrFifoCommand := fmt.Sprintf(stderrFifoCommandFormat, stderrFifoFormat, stderrFifoFormat, stderrFifoFormat)
stderrFifoRegexp := strings.ReplaceAll(regexp.QuoteMeta(stderrFifoCommand), "%d", "\\d*")
s.Regexp(stderrFifoRegexp, calledStderrCommand[len(calledStderrCommand)-1])
})
s.Run("should return correct output", func() {
s.Equal(s.expectedStdout, stdout.String())
s.Equal(s.expectedStderr, stderr.String())
})
}
func (s *ExecuteCommandTestSuite) TestWithSeparateStderrReturnsCommandError() {
config.Config.Server.InteractiveStderr = true
s.mockExecute(s.testCommandArray, 1, tests.ErrDefault, func(args mock.Arguments) {})
s.mockExecute(mock.AnythingOfType("[]string"), 1, nil, func(args mock.Arguments) {})
_, err := s.nomadAPIClient.
ExecuteCommand(s.allocationID, s.ctx, s.testCommandArray, withTTY, nullreader.NullReader{}, io.Discard, io.Discard)
s.Equal(tests.ErrDefault, err)
}
func (s *ExecuteCommandTestSuite) TestWithoutSeparateStderr() {
config.Config.Server.InteractiveStderr = false
var stdout, stderr bytes.Buffer
commandExitCode := 42
// mock regular call
s.mockExecute(s.testCommandArray, commandExitCode, nil, func(args mock.Arguments) {
stdout, ok := args.Get(5).(io.Writer)
s.Require().True(ok)
_, err := stdout.Write([]byte(s.expectedStdout))
s.Require().NoError(err)
stderr, ok := args.Get(6).(io.Writer)
s.Require().True(ok)
_, err = stderr.Write([]byte(s.expectedStderr))
s.Require().NoError(err)
})
exitCode, err := s.nomadAPIClient.
ExecuteCommand(s.allocationID, s.ctx, s.testCommandArray, withTTY, nullreader.NullReader{}, &stdout, &stderr)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "Execute", 1)
s.Equal(commandExitCode, exitCode)
s.Equal(s.expectedStdout, stdout.String())
s.Equal(s.expectedStderr, stderr.String())
}
func (s *ExecuteCommandTestSuite) TestWithoutSeparateStderrReturnsCommandError() {
config.Config.Server.InteractiveStderr = false
s.mockExecute(s.testCommandArray, 1, tests.ErrDefault, func(args mock.Arguments) {})
_, err := s.nomadAPIClient.
ExecuteCommand(s.allocationID, s.ctx, s.testCommandArray, withTTY, nullreader.NullReader{}, io.Discard, io.Discard)
s.ErrorIs(err, tests.ErrDefault)
}
func (s *ExecuteCommandTestSuite) mockExecute(command interface{}, exitCode int,
err error, runFunc func(arguments mock.Arguments)) {
s.apiMock.On("Execute", s.allocationID, s.ctx, command, withTTY,
mock.Anything, mock.Anything, mock.Anything).
Run(runFunc).
Return(exitCode, err)
}
func TestAPIClient_LoadRunnerPortMappings(t *testing.T) {
apiMock := &apiQuerierMock{}
mockedCall := apiMock.On("allocation", tests.DefaultRunnerID)
nomadAPIClient := APIClient{apiQuerier: apiMock}
t.Run("should return error when API query fails", func(t *testing.T) {
mockedCall.Return(nil, tests.ErrDefault)
portMappings, err := nomadAPIClient.LoadRunnerPortMappings(tests.DefaultRunnerID)
assert.Nil(t, portMappings)
assert.ErrorIs(t, err, tests.ErrDefault)
})
t.Run("should return error when AllocatedResources is nil", func(t *testing.T) {
mockedCall.Return(&nomadApi.Allocation{AllocatedResources: nil}, nil)
portMappings, err := nomadAPIClient.LoadRunnerPortMappings(tests.DefaultRunnerID)
assert.ErrorIs(t, err, ErrorNoAllocatedResourcesFound)
assert.Nil(t, portMappings)
})
t.Run("should correctly return ports", func(t *testing.T) {
allocation := &nomadApi.Allocation{
AllocatedResources: &nomadApi.AllocatedResources{
Shared: nomadApi.AllocatedSharedResources{Ports: tests.DefaultPortMappings},
},
}
mockedCall.Return(allocation, nil)
portMappings, err := nomadAPIClient.LoadRunnerPortMappings(tests.DefaultRunnerID)
assert.NoError(t, err)
assert.Equal(t, tests.DefaultPortMappings, portMappings)
})
}

View File

@ -0,0 +1,9 @@
package runner
import "gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
const (
defaultEnvironmentID = EnvironmentID(tests.DefaultEnvironmentIDAsInteger)
anotherEnvironmentID = EnvironmentID(tests.AnotherEnvironmentIDAsInteger)
defaultInactivityTimeout = 0
)

View File

@ -0,0 +1,46 @@
package runner
import (
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"sync"
)
// ExecutionStorage stores executions.
type ExecutionStorage interface {
// Add adds a runner to the storage.
// It overwrites the existing execution if an execution with the same id already exists.
Add(id ExecutionID, executionRequest *dto.ExecutionRequest)
// Pop deletes the execution with the given id from the storage and returns it.
// If no such execution exists, ok is false and true otherwise.
Pop(id ExecutionID) (request *dto.ExecutionRequest, ok bool)
}
// localExecutionStorage stores execution objects in the local application memory.
// ToDo: Create implementation that use some persistent storage like a database.
type localExecutionStorage struct {
sync.RWMutex
executions map[ExecutionID]*dto.ExecutionRequest
}
// NewLocalExecutionStorage responds with an ExecutionStorage implementation.
// This implementation stores the data thread-safe in the local application memory.
func NewLocalExecutionStorage() *localExecutionStorage {
return &localExecutionStorage{
executions: make(map[ExecutionID]*dto.ExecutionRequest),
}
}
func (s *localExecutionStorage) Add(id ExecutionID, executionRequest *dto.ExecutionRequest) {
s.Lock()
defer s.Unlock()
s.executions[id] = executionRequest
}
func (s *localExecutionStorage) Pop(id ExecutionID) (*dto.ExecutionRequest, bool) {
s.Lock()
defer s.Unlock()
request, ok := s.executions[id]
delete(s.executions, id)
return request, ok
}

View File

@ -0,0 +1,43 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package runner
import (
time "time"
mock "github.com/stretchr/testify/mock"
)
// InactivityTimerMock is an autogenerated mock type for the InactivityTimer type
type InactivityTimerMock struct {
mock.Mock
}
// ResetTimeout provides a mock function with given fields:
func (_m *InactivityTimerMock) ResetTimeout() {
_m.Called()
}
// SetupTimeout provides a mock function with given fields: duration
func (_m *InactivityTimerMock) SetupTimeout(duration time.Duration) {
_m.Called(duration)
}
// StopTimeout provides a mock function with given fields:
func (_m *InactivityTimerMock) StopTimeout() {
_m.Called()
}
// TimeoutPassed provides a mock function with given fields:
func (_m *InactivityTimerMock) TimeoutPassed() bool {
ret := _m.Called()
var r0 bool
if rf, ok := ret.Get(0).(func() bool); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(bool)
}
return r0
}

406
internal/runner/manager.go Normal file
View File

@ -0,0 +1,406 @@
package runner
import (
"context"
"errors"
"fmt"
"github.com/google/uuid"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/sirupsen/logrus"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/logging"
"strconv"
"strings"
"time"
)
var (
log = logging.GetLogger("runner")
ErrUnknownExecutionEnvironment = errors.New("execution environment not found")
ErrNoRunnersAvailable = errors.New("no runners available for this execution environment")
ErrRunnerNotFound = errors.New("no runner found with this id")
ErrorUpdatingExecutionEnvironment = errors.New("errors occurred when updating environment")
ErrorInvalidJobID = errors.New("invalid job id")
)
type EnvironmentID int
func NewEnvironmentID(id string) (EnvironmentID, error) {
environment, err := strconv.Atoi(id)
return EnvironmentID(environment), err
}
func (e EnvironmentID) toString() string {
return strconv.Itoa(int(e))
}
type NomadJobID string
// Manager keeps track of the used and unused runners of all execution environments in order to provide unused
// runners to new clients and ensure no runner is used twice.
type Manager interface {
// CreateOrUpdateEnvironment creates the given environment if it does not exist. Otherwise, it updates
// the existing environment and all runners. Iff a new Environment has been created, it returns true.
// Iff scale is true, runners are created until the desiredIdleRunnersCount is reached.
CreateOrUpdateEnvironment(id EnvironmentID, desiredIdleRunnersCount uint, templateJob *nomadApi.Job,
scale bool) (bool, error)
// Claim returns a new runner. The runner is deleted after duration seconds if duration is not 0.
// It makes sure that the runner is not in use yet and returns an error if no runner could be provided.
Claim(id EnvironmentID, duration int) (Runner, error)
// Get returns the used runner with the given runnerId.
// If no runner with the given runnerId is currently used, it returns an error.
Get(runnerID string) (Runner, error)
// Return signals that the runner is no longer used by the caller and can be claimed by someone else.
// The runner is deleted or cleaned up for reuse depending on the used executor.
Return(r Runner) error
// Load fetches all already created runners from the executor and registers them.
// It should be called during the startup process (e.g. on creation of the Manager).
Load()
}
type NomadRunnerManager struct {
apiClient nomad.ExecutorAPI
environments NomadEnvironmentStorage
usedRunners Storage
}
// NewNomadRunnerManager creates a new runner manager that keeps track of all runners.
// It uses the apiClient for all requests and runs a background task to keep the runners in sync with Nomad.
// If you cancel the context the background synchronization will be stopped.
func NewNomadRunnerManager(apiClient nomad.ExecutorAPI, ctx context.Context) *NomadRunnerManager {
m := &NomadRunnerManager{
apiClient,
NewLocalNomadEnvironmentStorage(),
NewLocalRunnerStorage(),
}
go m.keepRunnersSynced(ctx)
return m
}
type NomadEnvironment struct {
environmentID EnvironmentID
idleRunners Storage
desiredIdleRunnersCount uint
templateJob *nomadApi.Job
}
func (j *NomadEnvironment) ID() EnvironmentID {
return j.environmentID
}
func (m *NomadRunnerManager) CreateOrUpdateEnvironment(id EnvironmentID, desiredIdleRunnersCount uint,
templateJob *nomadApi.Job, scale bool) (bool, error) {
_, ok := m.environments.Get(id)
if !ok {
return true, m.registerEnvironment(id, desiredIdleRunnersCount, templateJob, scale)
}
return false, m.updateEnvironment(id, desiredIdleRunnersCount, templateJob, scale)
}
func (m *NomadRunnerManager) registerEnvironment(environmentID EnvironmentID, desiredIdleRunnersCount uint,
templateJob *nomadApi.Job, scale bool) error {
m.environments.Add(&NomadEnvironment{
environmentID,
NewLocalRunnerStorage(),
desiredIdleRunnersCount,
templateJob,
})
if scale {
err := m.scaleEnvironment(environmentID)
if err != nil {
return fmt.Errorf("couldn't upscale environment %w", err)
}
}
return nil
}
// updateEnvironment updates all runners of the specified environment. This is required as attributes like the
// CPULimit or MemoryMB could be changed in the new template job.
func (m *NomadRunnerManager) updateEnvironment(id EnvironmentID, desiredIdleRunnersCount uint,
newTemplateJob *nomadApi.Job, scale bool) error {
environment, ok := m.environments.Get(id)
if !ok {
return ErrUnknownExecutionEnvironment
}
environment.desiredIdleRunnersCount = desiredIdleRunnersCount
environment.templateJob = newTemplateJob
err := nomad.SetMetaConfigValue(newTemplateJob, nomad.ConfigMetaPoolSizeKey,
strconv.Itoa(int(desiredIdleRunnersCount)))
if err != nil {
return fmt.Errorf("update environment couldn't update template environment: %w", err)
}
err = m.updateRunnerSpecs(id, newTemplateJob)
if err != nil {
return err
}
if scale {
err = m.scaleEnvironment(id)
}
return err
}
func (m *NomadRunnerManager) updateRunnerSpecs(environmentID EnvironmentID, templateJob *nomadApi.Job) error {
runners, err := m.apiClient.LoadRunnerIDs(environmentID.toString())
if err != nil {
return fmt.Errorf("update environment couldn't load runners: %w", err)
}
var occurredError error
for _, id := range runners {
// avoid taking the address of the loop variable
runnerID := id
updatedRunnerJob := *templateJob
updatedRunnerJob.ID = &runnerID
updatedRunnerJob.Name = &runnerID
err := m.apiClient.RegisterRunnerJob(&updatedRunnerJob)
if err != nil {
if occurredError == nil {
occurredError = ErrorUpdatingExecutionEnvironment
}
occurredError = fmt.Errorf("%w; new api error for runner %s - %v", occurredError, id, err)
}
}
return occurredError
}
func (m *NomadRunnerManager) Claim(environmentID EnvironmentID, duration int) (Runner, error) {
environment, ok := m.environments.Get(environmentID)
if !ok {
return nil, ErrUnknownExecutionEnvironment
}
runner, ok := environment.idleRunners.Sample()
if !ok {
return nil, ErrNoRunnersAvailable
}
m.usedRunners.Add(runner)
err := m.apiClient.MarkRunnerAsUsed(runner.ID(), duration)
if err != nil {
return nil, fmt.Errorf("can't mark runner as used: %w", err)
}
runner.SetupTimeout(time.Duration(duration) * time.Second)
err = m.createRunner(environment)
if err != nil {
log.WithError(err).WithField("environmentID", environmentID).Error("Couldn't create new runner for claimed one")
}
return runner, nil
}
func (m *NomadRunnerManager) Get(runnerID string) (Runner, error) {
runner, ok := m.usedRunners.Get(runnerID)
if !ok {
return nil, ErrRunnerNotFound
}
return runner, nil
}
func (m *NomadRunnerManager) Return(r Runner) error {
r.StopTimeout()
err := m.apiClient.DeleteRunner(r.ID())
if err != nil {
return fmt.Errorf("error deleting runner in Nomad: %w", err)
}
m.usedRunners.Delete(r.ID())
return nil
}
func (m *NomadRunnerManager) Load() {
for _, environment := range m.environments.List() {
environmentLogger := log.WithField("environmentID", environment.ID())
runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID().toString())
if err != nil {
environmentLogger.WithError(err).Warn("Error fetching the runner jobs")
}
for _, job := range runnerJobs {
m.loadSingleJob(job, environmentLogger, environment)
}
err = m.scaleEnvironment(environment.ID())
if err != nil {
environmentLogger.Error("Couldn't scale environment")
}
}
}
func (m *NomadRunnerManager) loadSingleJob(job *nomadApi.Job, environmentLogger *logrus.Entry,
environment *NomadEnvironment,
) {
configTaskGroup := nomad.FindConfigTaskGroup(job)
if configTaskGroup == nil {
environmentLogger.Infof("Couldn't find config task group in job %s, skipping ...", *job.ID)
return
}
isUsed := configTaskGroup.Meta[nomad.ConfigMetaUsedKey] == nomad.ConfigMetaUsedValue
portMappings, err := m.apiClient.LoadRunnerPortMappings(*job.ID)
if err != nil {
environmentLogger.WithError(err).Warn("Error loading runner portMappings")
return
}
newJob := NewNomadJob(*job.ID, portMappings, m.apiClient, m)
if isUsed {
m.usedRunners.Add(newJob)
timeout, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaTimeoutKey])
if err != nil {
environmentLogger.WithError(err).Warn("Error loading timeout from meta values")
} else {
newJob.SetupTimeout(time.Duration(timeout) * time.Second)
}
} else {
environment.idleRunners.Add(newJob)
}
}
func (m *NomadRunnerManager) keepRunnersSynced(ctx context.Context) {
retries := 0
for ctx.Err() == nil {
err := m.apiClient.WatchAllocations(ctx, m.onAllocationAdded, m.onAllocationStopped)
retries += 1
log.WithError(err).Errorf("Stopped updating the runners! Retry %v", retries)
<-time.After(time.Second)
}
}
func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation) {
log.WithField("id", alloc.JobID).Debug("Runner started")
if IsEnvironmentTemplateID(alloc.JobID) {
return
}
environmentID, err := EnvironmentIDFromJobID(alloc.JobID)
if err != nil {
log.WithError(err).Warn("Allocation could not be added")
return
}
job, ok := m.environments.Get(environmentID)
if ok {
var mappedPorts []nomadApi.PortMapping
if alloc.AllocatedResources != nil {
mappedPorts = alloc.AllocatedResources.Shared.Ports
}
job.idleRunners.Add(NewNomadJob(alloc.JobID, mappedPorts, m.apiClient, m))
}
}
func (m *NomadRunnerManager) onAllocationStopped(alloc *nomadApi.Allocation) {
log.WithField("id", alloc.JobID).Debug("Runner stopped")
environmentID, err := EnvironmentIDFromJobID(alloc.JobID)
if err != nil {
log.WithError(err).Warn("Stopped allocation can not be handled")
return
}
m.usedRunners.Delete(alloc.JobID)
job, ok := m.environments.Get(environmentID)
if ok {
job.idleRunners.Delete(alloc.JobID)
}
}
// scaleEnvironment makes sure that the amount of idle runners is at least the desiredIdleRunnersCount.
func (m *NomadRunnerManager) scaleEnvironment(id EnvironmentID) error {
environment, ok := m.environments.Get(id)
if !ok {
return ErrUnknownExecutionEnvironment
}
required := int(environment.desiredIdleRunnersCount) - environment.idleRunners.Length()
if required > 0 {
return m.createRunners(environment, uint(required))
} else {
return m.removeRunners(environment, uint(-required))
}
}
func (m *NomadRunnerManager) createRunners(environment *NomadEnvironment, count uint) error {
log.WithField("runnersRequired", count).WithField("id", environment.ID()).Debug("Creating new runners")
for i := 0; i < int(count); i++ {
err := m.createRunner(environment)
if err != nil {
return fmt.Errorf("couldn't create new runner: %w", err)
}
}
return nil
}
func (m *NomadRunnerManager) createRunner(environment *NomadEnvironment) error {
newUUID, err := uuid.NewUUID()
if err != nil {
return fmt.Errorf("failed generating runner id: %w", err)
}
newRunnerID := RunnerJobID(environment.ID(), newUUID.String())
template := *environment.templateJob
template.ID = &newRunnerID
template.Name = &newRunnerID
err = m.apiClient.RegisterRunnerJob(&template)
if err != nil {
return fmt.Errorf("error registering new runner job: %w", err)
}
return nil
}
func (m *NomadRunnerManager) removeRunners(environment *NomadEnvironment, count uint) error {
log.WithField("runnersToDelete", count).WithField("id", environment.ID()).Debug("Removing idle runners")
for i := 0; i < int(count); i++ {
r, ok := environment.idleRunners.Sample()
if !ok {
return fmt.Errorf("could not delete expected idle runner: %w", ErrRunnerNotFound)
}
err := m.apiClient.DeleteRunner(r.ID())
if err != nil {
return fmt.Errorf("could not delete expected Nomad idle runner: %w", err)
}
}
return nil
}
// RunnerJobID returns the nomad job id of the runner with the given environmentID and id.
func RunnerJobID(environmentID EnvironmentID, id string) string {
return fmt.Sprintf("%d-%s", environmentID, id)
}
// EnvironmentIDFromJobID returns the environment id that is part of the passed job id.
func EnvironmentIDFromJobID(jobID string) (EnvironmentID, error) {
parts := strings.Split(jobID, "-")
if len(parts) == 0 {
return 0, fmt.Errorf("empty job id: %w", ErrorInvalidJobID)
}
environmentID, err := strconv.Atoi(parts[0])
if err != nil {
return 0, fmt.Errorf("invalid environment id par %v: %w", err, ErrorInvalidJobID)
}
return EnvironmentID(environmentID), nil
}
const templateJobNameParts = 2
// TemplateJobID returns the id of the template job for the environment with the given id.
func TemplateJobID(id EnvironmentID) string {
return fmt.Sprintf("%s-%d", nomad.TemplateJobPrefix, id)
}
// IsEnvironmentTemplateID checks if the passed job id belongs to a template job.
func IsEnvironmentTemplateID(jobID string) bool {
parts := strings.Split(jobID, "-")
return len(parts) == templateJobNameParts && parts[0] == nomad.TemplateJobPrefix
}
func EnvironmentIDFromTemplateJobID(id string) (string, error) {
parts := strings.Split(id, "-")
if len(parts) < templateJobNameParts {
return "", fmt.Errorf("invalid template job id: %w", ErrorInvalidJobID)
}
return parts[1], nil
}

View File

@ -0,0 +1,99 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package runner
import (
api "github.com/hashicorp/nomad/api"
mock "github.com/stretchr/testify/mock"
)
// ManagerMock is an autogenerated mock type for the Manager type
type ManagerMock struct {
mock.Mock
}
// Claim provides a mock function with given fields: id, duration
func (_m *ManagerMock) Claim(id EnvironmentID, duration int) (Runner, error) {
ret := _m.Called(id, duration)
var r0 Runner
if rf, ok := ret.Get(0).(func(EnvironmentID, int) Runner); ok {
r0 = rf(id, duration)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(Runner)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(EnvironmentID, int) error); ok {
r1 = rf(id, duration)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// CreateOrUpdateEnvironment provides a mock function with given fields: id, desiredIdleRunnersCount, templateJob, scale
func (_m *ManagerMock) CreateOrUpdateEnvironment(id EnvironmentID, desiredIdleRunnersCount uint, templateJob *api.Job, scale bool) (bool, error) {
ret := _m.Called(id, desiredIdleRunnersCount, templateJob, scale)
var r0 bool
if rf, ok := ret.Get(0).(func(EnvironmentID, uint, *api.Job, bool) bool); ok {
r0 = rf(id, desiredIdleRunnersCount, templateJob, scale)
} else {
r0 = ret.Get(0).(bool)
}
var r1 error
if rf, ok := ret.Get(1).(func(EnvironmentID, uint, *api.Job, bool) error); ok {
r1 = rf(id, desiredIdleRunnersCount, templateJob, scale)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Get provides a mock function with given fields: runnerID
func (_m *ManagerMock) Get(runnerID string) (Runner, error) {
ret := _m.Called(runnerID)
var r0 Runner
if rf, ok := ret.Get(0).(func(string) Runner); ok {
r0 = rf(runnerID)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(Runner)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(runnerID)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Load provides a mock function with given fields:
func (_m *ManagerMock) Load() {
_m.Called()
}
// Return provides a mock function with given fields: r
func (_m *ManagerMock) Return(r Runner) error {
ret := _m.Called(r)
var r0 error
if rf, ok := ret.Get(0).(func(Runner) error); ok {
r0 = rf(r)
} else {
r0 = ret.Error(0)
}
return r0
}

View File

@ -0,0 +1,347 @@
package runner
import (
"context"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/sirupsen/logrus"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests/helpers"
"strconv"
"testing"
"time"
)
const (
defaultDesiredRunnersCount uint = 5
)
func TestGetNextRunnerTestSuite(t *testing.T) {
suite.Run(t, new(ManagerTestSuite))
}
type ManagerTestSuite struct {
suite.Suite
apiMock *nomad.ExecutorAPIMock
nomadRunnerManager *NomadRunnerManager
exerciseRunner Runner
}
func (s *ManagerTestSuite) SetupTest() {
s.apiMock = &nomad.ExecutorAPIMock{}
mockRunnerQueries(s.apiMock, []string{})
// Instantly closed context to manually start the update process in some cases
ctx, cancel := context.WithCancel(context.Background())
cancel()
s.nomadRunnerManager = NewNomadRunnerManager(s.apiMock, ctx)
s.exerciseRunner = NewRunner(tests.DefaultRunnerID, s.nomadRunnerManager)
s.registerDefaultEnvironment()
}
func mockRunnerQueries(apiMock *nomad.ExecutorAPIMock, returnedRunnerIds []string) {
// reset expected calls to allow new mocked return values
apiMock.ExpectedCalls = []*mock.Call{}
call := apiMock.On("WatchAllocations", mock.Anything, mock.Anything, mock.Anything)
call.Run(func(args mock.Arguments) {
<-time.After(10 * time.Minute) // 10 minutes is the default test timeout
call.ReturnArguments = mock.Arguments{nil}
})
apiMock.On("LoadEnvironmentJobs").Return([]*nomadApi.Job{}, nil)
apiMock.On("MarkRunnerAsUsed", mock.AnythingOfType("string"), mock.AnythingOfType("int")).Return(nil)
apiMock.On("LoadRunnerIDs", tests.DefaultJobID).Return(returnedRunnerIds, nil)
apiMock.On("JobScale", tests.DefaultJobID).Return(uint(len(returnedRunnerIds)), nil)
apiMock.On("SetJobScale", tests.DefaultJobID, mock.AnythingOfType("uint"), "Runner Requested").Return(nil)
apiMock.On("RegisterRunnerJob", mock.Anything).Return(nil)
apiMock.On("MonitorEvaluation", mock.Anything, mock.Anything).Return(nil)
}
func (s *ManagerTestSuite) registerDefaultEnvironment() {
err := s.nomadRunnerManager.registerEnvironment(defaultEnvironmentID, 0, &nomadApi.Job{}, true)
s.Require().NoError(err)
}
func (s *ManagerTestSuite) AddIdleRunnerForDefaultEnvironment(r Runner) {
job, _ := s.nomadRunnerManager.environments.Get(defaultEnvironmentID)
job.idleRunners.Add(r)
}
func (s *ManagerTestSuite) waitForRunnerRefresh() {
<-time.After(100 * time.Millisecond)
}
func (s *ManagerTestSuite) TestRegisterEnvironmentAddsNewJob() {
err := s.nomadRunnerManager.
registerEnvironment(anotherEnvironmentID, defaultDesiredRunnersCount, &nomadApi.Job{}, true)
s.Require().NoError(err)
job, ok := s.nomadRunnerManager.environments.Get(defaultEnvironmentID)
s.True(ok)
s.NotNil(job)
}
func (s *ManagerTestSuite) TestClaimReturnsNotFoundErrorIfEnvironmentNotFound() {
runner, err := s.nomadRunnerManager.Claim(EnvironmentID(42), defaultInactivityTimeout)
s.Nil(runner)
s.Equal(ErrUnknownExecutionEnvironment, err)
}
func (s *ManagerTestSuite) TestClaimReturnsRunnerIfAvailable() {
s.AddIdleRunnerForDefaultEnvironment(s.exerciseRunner)
receivedRunner, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.NoError(err)
s.Equal(s.exerciseRunner, receivedRunner)
}
func (s *ManagerTestSuite) TestClaimReturnsErrorIfNoRunnerAvailable() {
s.waitForRunnerRefresh()
runner, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.Nil(runner)
s.Equal(ErrNoRunnersAvailable, err)
}
func (s *ManagerTestSuite) TestClaimReturnsNoRunnerOfDifferentEnvironment() {
s.AddIdleRunnerForDefaultEnvironment(s.exerciseRunner)
receivedRunner, err := s.nomadRunnerManager.Claim(anotherEnvironmentID, defaultInactivityTimeout)
s.Nil(receivedRunner)
s.Error(err)
}
func (s *ManagerTestSuite) TestClaimDoesNotReturnTheSameRunnerTwice() {
s.AddIdleRunnerForDefaultEnvironment(s.exerciseRunner)
s.AddIdleRunnerForDefaultEnvironment(NewRunner(tests.AnotherRunnerID, s.nomadRunnerManager))
firstReceivedRunner, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.NoError(err)
secondReceivedRunner, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.NoError(err)
s.NotEqual(firstReceivedRunner, secondReceivedRunner)
}
func (s *ManagerTestSuite) TestClaimThrowsAnErrorIfNoRunnersAvailable() {
receivedRunner, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.Nil(receivedRunner)
s.Error(err)
}
func (s *ManagerTestSuite) TestClaimAddsRunnerToUsedRunners() {
s.AddIdleRunnerForDefaultEnvironment(s.exerciseRunner)
receivedRunner, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.Require().NoError(err)
savedRunner, ok := s.nomadRunnerManager.usedRunners.Get(receivedRunner.ID())
s.True(ok)
s.Equal(savedRunner, receivedRunner)
}
func (s *ManagerTestSuite) TestTwoClaimsAddExactlyTwoRunners() {
s.AddIdleRunnerForDefaultEnvironment(s.exerciseRunner)
s.AddIdleRunnerForDefaultEnvironment(NewRunner(tests.AnotherRunnerID, s.nomadRunnerManager))
_, err := s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.Require().NoError(err)
_, err = s.nomadRunnerManager.Claim(defaultEnvironmentID, defaultInactivityTimeout)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "RegisterRunnerJob", 2)
}
func (s *ManagerTestSuite) TestGetReturnsRunnerIfRunnerIsUsed() {
s.nomadRunnerManager.usedRunners.Add(s.exerciseRunner)
savedRunner, err := s.nomadRunnerManager.Get(s.exerciseRunner.ID())
s.NoError(err)
s.Equal(savedRunner, s.exerciseRunner)
}
func (s *ManagerTestSuite) TestGetReturnsErrorIfRunnerNotFound() {
savedRunner, err := s.nomadRunnerManager.Get(tests.DefaultRunnerID)
s.Nil(savedRunner)
s.Error(err)
}
func (s *ManagerTestSuite) TestReturnRemovesRunnerFromUsedRunners() {
s.apiMock.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil)
s.nomadRunnerManager.usedRunners.Add(s.exerciseRunner)
err := s.nomadRunnerManager.Return(s.exerciseRunner)
s.Nil(err)
_, ok := s.nomadRunnerManager.usedRunners.Get(s.exerciseRunner.ID())
s.False(ok)
}
func (s *ManagerTestSuite) TestReturnCallsDeleteRunnerApiMethod() {
s.apiMock.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil)
err := s.nomadRunnerManager.Return(s.exerciseRunner)
s.Nil(err)
s.apiMock.AssertCalled(s.T(), "DeleteRunner", s.exerciseRunner.ID())
}
func (s *ManagerTestSuite) TestReturnReturnsErrorWhenApiCallFailed() {
s.apiMock.On("DeleteRunner", mock.AnythingOfType("string")).Return(tests.ErrDefault)
err := s.nomadRunnerManager.Return(s.exerciseRunner)
s.Error(err)
}
func (s *ManagerTestSuite) TestUpdateRunnersLogsErrorFromWatchAllocation() {
var hook *test.Hook
logger, hook := test.NewNullLogger()
log = logger.WithField("pkg", "runner")
modifyMockedCall(s.apiMock, "WatchAllocations", func(call *mock.Call) {
call.Run(func(args mock.Arguments) {
call.ReturnArguments = mock.Arguments{tests.ErrDefault}
})
})
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go s.nomadRunnerManager.keepRunnersSynced(ctx)
<-time.After(10 * time.Millisecond)
s.Require().Equal(1, len(hook.Entries))
s.Equal(logrus.ErrorLevel, hook.LastEntry().Level)
s.Equal(hook.LastEntry().Data[logrus.ErrorKey], tests.ErrDefault)
}
func (s *ManagerTestSuite) TestUpdateRunnersAddsIdleRunner() {
allocation := &nomadApi.Allocation{ID: tests.DefaultRunnerID}
environment, ok := s.nomadRunnerManager.environments.Get(defaultEnvironmentID)
s.Require().True(ok)
allocation.JobID = environment.environmentID.toString()
_, ok = environment.idleRunners.Get(allocation.ID)
s.Require().False(ok)
modifyMockedCall(s.apiMock, "WatchAllocations", func(call *mock.Call) {
call.Run(func(args mock.Arguments) {
onCreate, ok := args.Get(1).(nomad.AllocationProcessor)
s.Require().True(ok)
onCreate(allocation)
call.ReturnArguments = mock.Arguments{nil}
})
})
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go s.nomadRunnerManager.keepRunnersSynced(ctx)
<-time.After(10 * time.Millisecond)
_, ok = environment.idleRunners.Get(allocation.JobID)
s.True(ok)
}
func (s *ManagerTestSuite) TestUpdateRunnersRemovesIdleAndUsedRunner() {
allocation := &nomadApi.Allocation{JobID: tests.DefaultJobID}
environment, ok := s.nomadRunnerManager.environments.Get(defaultEnvironmentID)
s.Require().True(ok)
testRunner := NewRunner(allocation.JobID, s.nomadRunnerManager)
environment.idleRunners.Add(testRunner)
s.nomadRunnerManager.usedRunners.Add(testRunner)
modifyMockedCall(s.apiMock, "WatchAllocations", func(call *mock.Call) {
call.Run(func(args mock.Arguments) {
onDelete, ok := args.Get(2).(nomad.AllocationProcessor)
s.Require().True(ok)
onDelete(allocation)
call.ReturnArguments = mock.Arguments{nil}
})
})
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go s.nomadRunnerManager.keepRunnersSynced(ctx)
<-time.After(10 * time.Millisecond)
_, ok = environment.idleRunners.Get(allocation.JobID)
s.False(ok)
_, ok = s.nomadRunnerManager.usedRunners.Get(allocation.JobID)
s.False(ok)
}
func (s *ManagerTestSuite) TestUpdateEnvironmentRemovesIdleRunnersWhenScalingDown() {
_, job := helpers.CreateTemplateJob()
initialRunners := uint(40)
updatedRunners := uint(10)
err := s.nomadRunnerManager.registerEnvironment(anotherEnvironmentID, initialRunners, job, true)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "RegisterRunnerJob", int(initialRunners))
environment, ok := s.nomadRunnerManager.environments.Get(anotherEnvironmentID)
s.Require().True(ok)
for i := 0; i < int(initialRunners); i++ {
environment.idleRunners.Add(NewRunner("active-runner-"+strconv.Itoa(i), s.nomadRunnerManager))
}
s.apiMock.On("LoadRunnerIDs", anotherEnvironmentID.toString()).Return([]string{}, nil)
s.apiMock.On("DeleteRunner", mock.AnythingOfType("string")).Return(nil)
err = s.nomadRunnerManager.updateEnvironment(tests.AnotherEnvironmentIDAsInteger, updatedRunners, job, true)
s.Require().NoError(err)
s.apiMock.AssertNumberOfCalls(s.T(), "DeleteRunner", int(initialRunners-updatedRunners))
}
func modifyMockedCall(apiMock *nomad.ExecutorAPIMock, method string, modifier func(call *mock.Call)) {
for _, c := range apiMock.ExpectedCalls {
if c.Method == method {
modifier(c)
}
}
}
func (s *ManagerTestSuite) TestOnAllocationAdded() {
s.registerDefaultEnvironment()
s.Run("does not add environment template id job", func() {
alloc := &nomadApi.Allocation{JobID: TemplateJobID(tests.DefaultEnvironmentIDAsInteger)}
s.nomadRunnerManager.onAllocationAdded(alloc)
job, ok := s.nomadRunnerManager.environments.Get(tests.DefaultEnvironmentIDAsInteger)
s.True(ok)
s.Zero(job.idleRunners.Length())
})
s.Run("does not panic when environment id cannot be parsed", func() {
alloc := &nomadApi.Allocation{JobID: ""}
s.NotPanics(func() {
s.nomadRunnerManager.onAllocationAdded(alloc)
})
})
s.Run("does not panic when environment does not exist", func() {
nonExistentEnvironment := EnvironmentID(1234)
_, ok := s.nomadRunnerManager.environments.Get(nonExistentEnvironment)
s.Require().False(ok)
alloc := &nomadApi.Allocation{JobID: RunnerJobID(nonExistentEnvironment, "1-1-1-1")}
s.NotPanics(func() {
s.nomadRunnerManager.onAllocationAdded(alloc)
})
})
s.Run("adds correct job", func() {
s.Run("without allocated resources", func() {
alloc := &nomadApi.Allocation{
JobID: tests.DefaultJobID,
AllocatedResources: nil,
}
s.nomadRunnerManager.onAllocationAdded(alloc)
job, ok := s.nomadRunnerManager.environments.Get(tests.DefaultEnvironmentIDAsInteger)
s.True(ok)
runner, ok := job.idleRunners.Get(tests.DefaultJobID)
s.True(ok)
nomadJob, ok := runner.(*NomadJob)
s.True(ok)
s.Equal(nomadJob.id, tests.DefaultJobID)
s.Empty(nomadJob.portMappings)
})
s.Run("with mapped ports", func() {
alloc := &nomadApi.Allocation{
JobID: tests.DefaultJobID,
AllocatedResources: &nomadApi.AllocatedResources{
Shared: nomadApi.AllocatedSharedResources{Ports: tests.DefaultPortMappings},
},
}
s.nomadRunnerManager.onAllocationAdded(alloc)
job, ok := s.nomadRunnerManager.environments.Get(tests.DefaultEnvironmentIDAsInteger)
s.True(ok)
runner, ok := job.idleRunners.Get(tests.DefaultJobID)
s.True(ok)
nomadJob, ok := runner.(*NomadJob)
s.True(ok)
s.Equal(nomadJob.id, tests.DefaultJobID)
s.Equal(nomadJob.portMappings, tests.DefaultPortMappings)
})
})
}

View File

@ -0,0 +1,75 @@
package runner
import (
"sync"
)
// NomadEnvironmentStorage is an interface for storing Nomad environments.
type NomadEnvironmentStorage interface {
// List returns all environments stored in this storage.
List() []*NomadEnvironment
// Add adds an environment to the storage.
// It overwrites the old environment if one with the same id was already stored.
Add(environment *NomadEnvironment)
// Get returns an environment from the storage.
// Iff the environment does not exist in the store, ok will be false.
Get(id EnvironmentID) (environment *NomadEnvironment, ok bool)
// Delete deletes the environment with the passed id from the storage. It does nothing if no environment with the id
// is present in the storage.
Delete(id EnvironmentID)
// Length returns the number of currently stored environments in the storage.
Length() int
}
// localNomadEnvironmentStorage stores NomadEnvironment objects in the local application memory.
type localNomadEnvironmentStorage struct {
sync.RWMutex
environments map[EnvironmentID]*NomadEnvironment
}
// NewLocalNomadEnvironmentStorage responds with an empty localNomadEnvironmentStorage.
// This implementation stores the data thread-safe in the local application memory.
func NewLocalNomadEnvironmentStorage() *localNomadEnvironmentStorage {
return &localNomadEnvironmentStorage{
environments: make(map[EnvironmentID]*NomadEnvironment),
}
}
func (s *localNomadEnvironmentStorage) List() []*NomadEnvironment {
s.RLock()
defer s.RUnlock()
values := make([]*NomadEnvironment, 0, len(s.environments))
for _, v := range s.environments {
values = append(values, v)
}
return values
}
func (s *localNomadEnvironmentStorage) Add(environment *NomadEnvironment) {
s.Lock()
defer s.Unlock()
s.environments[environment.ID()] = environment
}
func (s *localNomadEnvironmentStorage) Get(id EnvironmentID) (environment *NomadEnvironment, ok bool) {
s.RLock()
defer s.RUnlock()
environment, ok = s.environments[id]
return
}
func (s *localNomadEnvironmentStorage) Delete(id EnvironmentID) {
s.Lock()
defer s.Unlock()
delete(s.environments, id)
}
func (s *localNomadEnvironmentStorage) Length() int {
s.RLock()
defer s.RUnlock()
return len(s.environments)
}

View File

@ -0,0 +1,76 @@
package runner
import (
nomadApi "github.com/hashicorp/nomad/api"
"github.com/stretchr/testify/suite"
"testing"
)
func TestEnvironmentStoreTestSuite(t *testing.T) {
suite.Run(t, new(EnvironmentStoreTestSuite))
}
type EnvironmentStoreTestSuite struct {
suite.Suite
environmentStorage *localNomadEnvironmentStorage
environment *NomadEnvironment
}
func (s *EnvironmentStoreTestSuite) SetupTest() {
s.environmentStorage = NewLocalNomadEnvironmentStorage()
s.environment = &NomadEnvironment{environmentID: defaultEnvironmentID}
}
func (s *EnvironmentStoreTestSuite) TestAddedEnvironmentCanBeRetrieved() {
s.environmentStorage.Add(s.environment)
retrievedEnvironment, ok := s.environmentStorage.Get(s.environment.ID())
s.True(ok, "A saved runner should be retrievable")
s.Equal(s.environment, retrievedEnvironment)
}
func (s *EnvironmentStoreTestSuite) TestEnvironmentWithSameIdOverwritesOldOne() {
otherEnvironmentWithSameID := &NomadEnvironment{environmentID: defaultEnvironmentID}
otherEnvironmentWithSameID.templateJob = &nomadApi.Job{}
s.NotEqual(s.environment, otherEnvironmentWithSameID)
s.environmentStorage.Add(s.environment)
s.environmentStorage.Add(otherEnvironmentWithSameID)
retrievedEnvironment, _ := s.environmentStorage.Get(s.environment.ID())
s.NotEqual(s.environment, retrievedEnvironment)
s.Equal(otherEnvironmentWithSameID, retrievedEnvironment)
}
func (s *EnvironmentStoreTestSuite) TestDeletedEnvironmentIsNotAccessible() {
s.environmentStorage.Add(s.environment)
s.environmentStorage.Delete(s.environment.ID())
retrievedRunner, ok := s.environmentStorage.Get(s.environment.ID())
s.Nil(retrievedRunner)
s.False(ok, "A deleted runner should not be accessible")
}
func (s *EnvironmentStoreTestSuite) TestLenOfEmptyPoolIsZero() {
s.Equal(0, s.environmentStorage.Length())
}
func (s *EnvironmentStoreTestSuite) TestLenChangesOnStoreContentChange() {
s.Run("len increases when environment is added", func() {
s.environmentStorage.Add(s.environment)
s.Equal(1, s.environmentStorage.Length())
})
s.Run("len does not increase when environment with same id is added", func() {
s.environmentStorage.Add(s.environment)
s.Equal(1, s.environmentStorage.Length())
})
s.Run("len increases again when different environment is added", func() {
anotherEnvironment := &NomadEnvironment{environmentID: anotherEnvironmentID}
s.environmentStorage.Add(anotherEnvironment)
s.Equal(2, s.environmentStorage.Length())
})
s.Run("len decreases when environment is deleted", func() {
s.environmentStorage.Delete(s.environment.ID())
s.Equal(1, s.environmentStorage.Length())
})
}

338
internal/runner/runner.go Normal file
View File

@ -0,0 +1,338 @@
package runner
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"io"
"strings"
"sync"
"time"
)
// ContextKey is the type for keys in a request context.
type ContextKey string
// ExecutionID is an id for an execution in a Runner.
type ExecutionID string
const (
// runnerContextKey is the key used to store runners in context.Context.
runnerContextKey ContextKey = "runner"
)
var (
ErrorFileCopyFailed = errors.New("file copy failed")
ErrorRunnerInactivityTimeout = errors.New("runner inactivity timeout exceeded")
)
// InactivityTimer is a wrapper around a timer that is used to delete a a Runner after some time of inactivity.
type InactivityTimer interface {
// SetupTimeout starts the timeout after a runner gets deleted.
SetupTimeout(duration time.Duration)
// ResetTimeout resets the current timeout so that the runner gets deleted after the time set in Setup from now.
// It does not make an already expired timer run again.
ResetTimeout()
// StopTimeout stops the timeout but does not remove the runner.
StopTimeout()
// TimeoutPassed returns true if the timeout expired and false otherwise.
TimeoutPassed() bool
}
type TimerState uint8
const (
TimerInactive TimerState = 0
TimerRunning TimerState = 1
TimerExpired TimerState = 2
)
type InactivityTimerImplementation struct {
timer *time.Timer
duration time.Duration
state TimerState
runner Runner
manager Manager
sync.Mutex
}
func NewInactivityTimer(runner Runner, manager Manager) InactivityTimer {
return &InactivityTimerImplementation{
state: TimerInactive,
runner: runner,
manager: manager,
}
}
func (t *InactivityTimerImplementation) SetupTimeout(duration time.Duration) {
t.Lock()
defer t.Unlock()
// Stop old timer if present.
if t.timer != nil {
t.timer.Stop()
}
if duration == 0 {
t.state = TimerInactive
return
}
t.state = TimerRunning
t.duration = duration
t.timer = time.AfterFunc(duration, func() {
t.Lock()
t.state = TimerExpired
// The timer must be unlocked here already in order to avoid a deadlock with the call to StopTimout in Manager.Return.
t.Unlock()
err := t.manager.Return(t.runner)
if err != nil {
log.WithError(err).WithField("id", t.runner.ID()).Warn("Returning runner after inactivity caused an error")
} else {
log.WithField("id", t.runner.ID()).Info("Returning runner due to inactivity timeout")
}
})
}
func (t *InactivityTimerImplementation) ResetTimeout() {
t.Lock()
defer t.Unlock()
if t.state != TimerRunning {
// The timer has already expired or been stopped. We don't want to restart it.
return
}
if t.timer.Stop() {
t.timer.Reset(t.duration)
} else {
log.Error("Timer is in state running but stopped. This should never happen")
}
}
func (t *InactivityTimerImplementation) StopTimeout() {
t.Lock()
defer t.Unlock()
if t.state != TimerRunning {
return
}
t.timer.Stop()
t.state = TimerInactive
}
func (t *InactivityTimerImplementation) TimeoutPassed() bool {
return t.state == TimerExpired
}
type Runner interface {
// ID returns the id of the runner.
ID() string
// MappedPorts returns the mapped ports of the runner.
MappedPorts() []*dto.MappedPort
ExecutionStorage
InactivityTimer
// ExecuteInteractively runs the given execution request and forwards from and to the given reader and writers.
// An ExitInfo is sent to the exit channel on command completion.
// Output from the runner is forwarded immediately.
ExecuteInteractively(
request *dto.ExecutionRequest,
stdin io.Reader,
stdout,
stderr io.Writer,
) (exit <-chan ExitInfo, cancel context.CancelFunc)
// UpdateFileSystem processes a dto.UpdateFileSystemRequest by first deleting each given dto.FilePath recursively
// and then copying each given dto.File to the runner.
UpdateFileSystem(request *dto.UpdateFileSystemRequest) error
}
// NomadJob is an abstraction to communicate with Nomad environments.
type NomadJob struct {
ExecutionStorage
InactivityTimer
id string
portMappings []nomadApi.PortMapping
api nomad.ExecutorAPI
}
// NewNomadJob creates a new NomadJob with the provided id.
func NewNomadJob(id string, portMappings []nomadApi.PortMapping,
apiClient nomad.ExecutorAPI, manager Manager,
) *NomadJob {
job := &NomadJob{
id: id,
portMappings: portMappings,
api: apiClient,
ExecutionStorage: NewLocalExecutionStorage(),
}
job.InactivityTimer = NewInactivityTimer(job, manager)
return job
}
func (r *NomadJob) ID() string {
return r.id
}
func (r *NomadJob) MappedPorts() []*dto.MappedPort {
ports := make([]*dto.MappedPort, 0, len(r.portMappings))
for _, portMapping := range r.portMappings {
ports = append(ports, &dto.MappedPort{
ExposedPort: uint(portMapping.To),
HostAddress: fmt.Sprintf("%s:%d", portMapping.HostIP, portMapping.Value),
})
}
return ports
}
type ExitInfo struct {
Code uint8
Err error
}
func (r *NomadJob) ExecuteInteractively(
request *dto.ExecutionRequest,
stdin io.Reader,
stdout, stderr io.Writer,
) (<-chan ExitInfo, context.CancelFunc) {
r.ResetTimeout()
command := request.FullCommand()
var ctx context.Context
var cancel context.CancelFunc
if request.TimeLimit == 0 {
ctx, cancel = context.WithCancel(context.Background())
} else {
ctx, cancel = context.WithTimeout(context.Background(), time.Duration(request.TimeLimit)*time.Second)
}
exit := make(chan ExitInfo)
go func() {
exitCode, err := r.api.ExecuteCommand(r.id, ctx, command, true, stdin, stdout, stderr)
if err == nil && r.TimeoutPassed() {
err = ErrorRunnerInactivityTimeout
}
exit <- ExitInfo{uint8(exitCode), err}
close(exit)
}()
return exit, cancel
}
func (r *NomadJob) UpdateFileSystem(copyRequest *dto.UpdateFileSystemRequest) error {
r.ResetTimeout()
var tarBuffer bytes.Buffer
if err := createTarArchiveForFiles(copyRequest.Copy, &tarBuffer); err != nil {
return err
}
fileDeletionCommand := fileDeletionCommand(copyRequest.Delete)
copyCommand := "tar --extract --absolute-names --verbose --file=/dev/stdin;"
updateFileCommand := (&dto.ExecutionRequest{Command: fileDeletionCommand + copyCommand}).FullCommand()
stdOut := bytes.Buffer{}
stdErr := bytes.Buffer{}
exitCode, err := r.api.ExecuteCommand(r.id, context.Background(), updateFileCommand, false,
&tarBuffer, &stdOut, &stdErr)
if err != nil {
return fmt.Errorf(
"%w: nomad error during file copy: %v",
nomad.ErrorExecutorCommunicationFailed,
err)
}
if exitCode != 0 {
return fmt.Errorf(
"%w: stderr output '%s' and stdout output '%s'",
ErrorFileCopyFailed,
stdErr.String(),
stdOut.String())
}
return nil
}
func createTarArchiveForFiles(filesToCopy []dto.File, w io.Writer) error {
tarWriter := tar.NewWriter(w)
for _, file := range filesToCopy {
if err := tarWriter.WriteHeader(tarHeader(file)); err != nil {
err := fmt.Errorf("error writing tar file header: %w", err)
log.
WithField("file", file).
Error(err)
return err
}
if _, err := tarWriter.Write(file.ByteContent()); err != nil {
err := fmt.Errorf("error writing tar file content: %w", err)
log.
WithField("file", file).
Error(err)
return err
}
}
if err := tarWriter.Close(); err != nil {
return fmt.Errorf("error closing tar writer: %w", err)
}
return nil
}
func fileDeletionCommand(pathsToDelete []dto.FilePath) string {
if len(pathsToDelete) == 0 {
return ""
}
command := "rm --recursive --force "
for _, filePath := range pathsToDelete {
// To avoid command injection, filenames need to be quoted.
// See https://unix.stackexchange.com/questions/347332/what-characters-need-to-be-escaped-in-files-without-quotes
// for details.
singleQuoteEscapedFileName := strings.ReplaceAll(filePath.Cleaned(), "'", "'\\''")
command += fmt.Sprintf("'%s' ", singleQuoteEscapedFileName)
}
command += ";"
return command
}
func tarHeader(file dto.File) *tar.Header {
if file.IsDirectory() {
return &tar.Header{
Typeflag: tar.TypeDir,
Name: file.CleanedPath(),
Mode: 0755,
}
} else {
return &tar.Header{
Typeflag: tar.TypeReg,
Name: file.CleanedPath(),
Mode: 0744,
Size: int64(len(file.Content)),
}
}
}
// MarshalJSON implements json.Marshaler interface.
// This exports private attributes like the id too.
func (r *NomadJob) MarshalJSON() ([]byte, error) {
res, err := json.Marshal(struct {
ID string `json:"runnerId"`
}{
ID: r.ID(),
})
if err != nil {
return nil, fmt.Errorf("error marshaling Nomad job: %w", err)
}
return res, nil
}
// NewContext creates a context containing a runner.
func NewContext(ctx context.Context, runner Runner) context.Context {
return context.WithValue(ctx, runnerContextKey, runner)
}
// FromContext returns a runner from a context.
func FromContext(ctx context.Context) (Runner, bool) {
runner, ok := ctx.Value(runnerContextKey).(Runner)
return runner, ok
}

View File

@ -0,0 +1,145 @@
// Code generated by mockery v0.0.0-dev. DO NOT EDIT.
package runner
import (
context "context"
io "io"
dto "gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
mock "github.com/stretchr/testify/mock"
time "time"
)
// RunnerMock is an autogenerated mock type for the Runner type
type RunnerMock struct {
mock.Mock
}
// Add provides a mock function with given fields: id, executionRequest
func (_m *RunnerMock) Add(id ExecutionID, executionRequest *dto.ExecutionRequest) {
_m.Called(id, executionRequest)
}
// ExecuteInteractively provides a mock function with given fields: request, stdin, stdout, stderr
func (_m *RunnerMock) ExecuteInteractively(request *dto.ExecutionRequest, stdin io.Reader, stdout io.Writer, stderr io.Writer) (<-chan ExitInfo, context.CancelFunc) {
ret := _m.Called(request, stdin, stdout, stderr)
var r0 <-chan ExitInfo
if rf, ok := ret.Get(0).(func(*dto.ExecutionRequest, io.Reader, io.Writer, io.Writer) <-chan ExitInfo); ok {
r0 = rf(request, stdin, stdout, stderr)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan ExitInfo)
}
}
var r1 context.CancelFunc
if rf, ok := ret.Get(1).(func(*dto.ExecutionRequest, io.Reader, io.Writer, io.Writer) context.CancelFunc); ok {
r1 = rf(request, stdin, stdout, stderr)
} else {
if ret.Get(1) != nil {
r1 = ret.Get(1).(context.CancelFunc)
}
}
return r0, r1
}
// Id provides a mock function with given fields:
func (_m *RunnerMock) ID() string {
ret := _m.Called()
var r0 string
if rf, ok := ret.Get(0).(func() string); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// MappedPorts provides a mock function with given fields:
func (_m *RunnerMock) MappedPorts() []*dto.MappedPort {
ret := _m.Called()
var r0 []*dto.MappedPort
if rf, ok := ret.Get(0).(func() []*dto.MappedPort); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]*dto.MappedPort)
}
}
return r0
}
// Pop provides a mock function with given fields: id
func (_m *RunnerMock) Pop(id ExecutionID) (*dto.ExecutionRequest, bool) {
ret := _m.Called(id)
var r0 *dto.ExecutionRequest
if rf, ok := ret.Get(0).(func(ExecutionID) *dto.ExecutionRequest); ok {
r0 = rf(id)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*dto.ExecutionRequest)
}
}
var r1 bool
if rf, ok := ret.Get(1).(func(ExecutionID) bool); ok {
r1 = rf(id)
} else {
r1 = ret.Get(1).(bool)
}
return r0, r1
}
// ResetTimeout provides a mock function with given fields:
func (_m *RunnerMock) ResetTimeout() {
_m.Called()
}
// SetupTimeout provides a mock function with given fields: duration
func (_m *RunnerMock) SetupTimeout(duration time.Duration) {
_m.Called(duration)
}
// StopTimeout provides a mock function with given fields:
func (_m *RunnerMock) StopTimeout() {
_m.Called()
}
// TimeoutPassed provides a mock function with given fields:
func (_m *RunnerMock) TimeoutPassed() bool {
ret := _m.Called()
var r0 bool
if rf, ok := ret.Get(0).(func() bool); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// UpdateFileSystem provides a mock function with given fields: request
func (_m *RunnerMock) UpdateFileSystem(request *dto.UpdateFileSystemRequest) error {
ret := _m.Called(request)
var r0 error
if rf, ok := ret.Get(0).(func(*dto.UpdateFileSystemRequest) error); ok {
r0 = rf(request)
} else {
r0 = ret.Error(0)
}
return r0
}

View File

@ -0,0 +1,401 @@
package runner
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"fmt"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"io"
"regexp"
"strings"
"testing"
"time"
)
func TestIdIsStored(t *testing.T) {
runner := NewNomadJob(tests.DefaultJobID, nil, nil, nil)
assert.Equal(t, tests.DefaultJobID, runner.ID())
}
func TestMappedPortsAreStoredCorrectly(t *testing.T) {
runner := NewNomadJob(tests.DefaultJobID, tests.DefaultPortMappings, nil, nil)
assert.Equal(t, tests.DefaultMappedPorts, runner.MappedPorts())
runner = NewNomadJob(tests.DefaultJobID, nil, nil, nil)
assert.Empty(t, runner.MappedPorts())
}
func TestMarshalRunner(t *testing.T) {
runner := NewNomadJob(tests.DefaultJobID, nil, nil, nil)
marshal, err := json.Marshal(runner)
assert.NoError(t, err)
assert.Equal(t, "{\"runnerId\":\""+tests.DefaultJobID+"\"}", string(marshal))
}
func TestExecutionRequestIsStored(t *testing.T) {
runner := NewNomadJob(tests.DefaultJobID, nil, nil, nil)
executionRequest := &dto.ExecutionRequest{
Command: "command",
TimeLimit: 10,
Environment: nil,
}
id := ExecutionID("test-execution")
runner.Add(id, executionRequest)
storedExecutionRunner, ok := runner.Pop(id)
assert.True(t, ok, "Getting an execution should not return ok false")
assert.Equal(t, executionRequest, storedExecutionRunner)
}
func TestNewContextReturnsNewContextWithRunner(t *testing.T) {
runner := NewNomadJob(tests.DefaultRunnerID, nil, nil, nil)
ctx := context.Background()
newCtx := NewContext(ctx, runner)
storedRunner, ok := newCtx.Value(runnerContextKey).(Runner)
require.True(t, ok)
assert.NotEqual(t, ctx, newCtx)
assert.Equal(t, runner, storedRunner)
}
func TestFromContextReturnsRunner(t *testing.T) {
runner := NewNomadJob(tests.DefaultRunnerID, nil, nil, nil)
ctx := NewContext(context.Background(), runner)
storedRunner, ok := FromContext(ctx)
assert.True(t, ok)
assert.Equal(t, runner, storedRunner)
}
func TestFromContextReturnsIsNotOkWhenContextHasNoRunner(t *testing.T) {
ctx := context.Background()
_, ok := FromContext(ctx)
assert.False(t, ok)
}
func TestExecuteInteractivelyTestSuite(t *testing.T) {
suite.Run(t, new(ExecuteInteractivelyTestSuite))
}
type ExecuteInteractivelyTestSuite struct {
suite.Suite
runner *NomadJob
apiMock *nomad.ExecutorAPIMock
timer *InactivityTimerMock
mockedExecuteCommandCall *mock.Call
mockedTimeoutPassedCall *mock.Call
}
func (s *ExecuteInteractivelyTestSuite) SetupTest() {
s.apiMock = &nomad.ExecutorAPIMock{}
s.mockedExecuteCommandCall = s.apiMock.
On("ExecuteCommand", mock.Anything, mock.Anything, mock.Anything, true, mock.Anything, mock.Anything, mock.Anything).
Return(0, nil)
s.timer = &InactivityTimerMock{}
s.timer.On("ResetTimeout").Return()
s.mockedTimeoutPassedCall = s.timer.On("TimeoutPassed").Return(false)
s.runner = &NomadJob{
ExecutionStorage: NewLocalExecutionStorage(),
InactivityTimer: s.timer,
id: tests.DefaultRunnerID,
api: s.apiMock,
}
}
func (s *ExecuteInteractivelyTestSuite) TestCallsApi() {
request := &dto.ExecutionRequest{Command: "echo 'Hello World!'"}
s.runner.ExecuteInteractively(request, nil, nil, nil)
time.Sleep(tests.ShortTimeout)
s.apiMock.AssertCalled(s.T(), "ExecuteCommand", tests.DefaultRunnerID, mock.Anything, request.FullCommand(),
true, mock.Anything, mock.Anything, mock.Anything)
}
func (s *ExecuteInteractivelyTestSuite) TestReturnsAfterTimeout() {
s.mockedExecuteCommandCall.Run(func(args mock.Arguments) {
ctx, ok := args.Get(1).(context.Context)
s.Require().True(ok)
<-ctx.Done()
}).
Return(0, nil)
timeLimit := 1
execution := &dto.ExecutionRequest{TimeLimit: timeLimit}
exit, _ := s.runner.ExecuteInteractively(execution, nil, nil, nil)
select {
case <-exit:
s.FailNow("ExecuteInteractively should not terminate instantly")
case <-time.After(tests.ShortTimeout):
}
select {
case <-time.After(time.Duration(timeLimit) * time.Second):
s.FailNow("ExecuteInteractively should return after the time limit")
case exitInfo := <-exit:
s.Equal(uint8(0), exitInfo.Code)
}
}
func (s *ExecuteInteractivelyTestSuite) TestResetTimerGetsCalled() {
execution := &dto.ExecutionRequest{}
s.runner.ExecuteInteractively(execution, nil, nil, nil)
s.timer.AssertCalled(s.T(), "ResetTimeout")
}
func (s *ExecuteInteractivelyTestSuite) TestExitHasTimeoutErrorIfExecutionTimesOut() {
s.mockedTimeoutPassedCall.Return(true)
execution := &dto.ExecutionRequest{}
exitChannel, _ := s.runner.ExecuteInteractively(execution, nil, nil, nil)
exit := <-exitChannel
s.Equal(ErrorRunnerInactivityTimeout, exit.Err)
}
func TestUpdateFileSystemTestSuite(t *testing.T) {
suite.Run(t, new(UpdateFileSystemTestSuite))
}
type UpdateFileSystemTestSuite struct {
suite.Suite
runner *NomadJob
timer *InactivityTimerMock
apiMock *nomad.ExecutorAPIMock
mockedExecuteCommandCall *mock.Call
command []string
stdin *bytes.Buffer
}
func (s *UpdateFileSystemTestSuite) SetupTest() {
s.apiMock = &nomad.ExecutorAPIMock{}
s.timer = &InactivityTimerMock{}
s.timer.On("ResetTimeout").Return()
s.timer.On("TimeoutPassed").Return(false)
s.runner = &NomadJob{
ExecutionStorage: NewLocalExecutionStorage(),
InactivityTimer: s.timer,
id: tests.DefaultRunnerID,
api: s.apiMock,
}
s.mockedExecuteCommandCall = s.apiMock.On("ExecuteCommand", tests.DefaultRunnerID, mock.Anything,
mock.Anything, false, mock.Anything, mock.Anything, mock.Anything).
Run(func(args mock.Arguments) {
var ok bool
s.command, ok = args.Get(2).([]string)
s.Require().True(ok)
s.stdin, ok = args.Get(4).(*bytes.Buffer)
s.Require().True(ok)
}).Return(0, nil)
}
func (s *UpdateFileSystemTestSuite) TestUpdateFileSystemForRunnerPerformsTarExtractionWithAbsoluteNamesOnRunner() {
// note: this method tests an implementation detail of the method UpdateFileSystemOfRunner method
// if the implementation changes, delete this test and write a new one
copyRequest := &dto.UpdateFileSystemRequest{}
err := s.runner.UpdateFileSystem(copyRequest)
s.NoError(err)
s.apiMock.AssertCalled(s.T(), "ExecuteCommand", mock.Anything, mock.Anything, mock.Anything,
false, mock.Anything, mock.Anything, mock.Anything)
s.Regexp("tar --extract --absolute-names", s.command)
}
func (s *UpdateFileSystemTestSuite) TestUpdateFileSystemForRunnerReturnsErrorIfExitCodeIsNotZero() {
s.mockedExecuteCommandCall.Return(1, nil)
copyRequest := &dto.UpdateFileSystemRequest{}
err := s.runner.UpdateFileSystem(copyRequest)
s.ErrorIs(err, ErrorFileCopyFailed)
}
func (s *UpdateFileSystemTestSuite) TestUpdateFileSystemForRunnerReturnsErrorIfApiCallDid() {
s.mockedExecuteCommandCall.Return(0, tests.ErrDefault)
copyRequest := &dto.UpdateFileSystemRequest{}
err := s.runner.UpdateFileSystem(copyRequest)
s.ErrorIs(err, nomad.ErrorExecutorCommunicationFailed)
}
func (s *UpdateFileSystemTestSuite) TestFilesToCopyAreIncludedInTarArchive() {
copyRequest := &dto.UpdateFileSystemRequest{Copy: []dto.File{
{Path: tests.DefaultFileName, Content: []byte(tests.DefaultFileContent)}}}
err := s.runner.UpdateFileSystem(copyRequest)
s.NoError(err)
s.apiMock.AssertCalled(s.T(), "ExecuteCommand", mock.Anything, mock.Anything, mock.Anything, false,
mock.Anything, mock.Anything, mock.Anything)
tarFiles := s.readFilesFromTarArchive(s.stdin)
s.Len(tarFiles, 1)
tarFile := tarFiles[0]
s.True(strings.HasSuffix(tarFile.Name, tests.DefaultFileName))
s.Equal(byte(tar.TypeReg), tarFile.TypeFlag)
s.Equal(tests.DefaultFileContent, tarFile.Content)
}
func (s *UpdateFileSystemTestSuite) TestTarFilesContainCorrectPathForRelativeFilePath() {
copyRequest := &dto.UpdateFileSystemRequest{Copy: []dto.File{
{Path: tests.DefaultFileName, Content: []byte(tests.DefaultFileContent)}}}
err := s.runner.UpdateFileSystem(copyRequest)
s.Require().NoError(err)
tarFiles := s.readFilesFromTarArchive(s.stdin)
s.Len(tarFiles, 1)
// tar is extracted in the active workdir of the container, file will be put relative to that
s.Equal(tests.DefaultFileName, tarFiles[0].Name)
}
func (s *UpdateFileSystemTestSuite) TestFilesWithAbsolutePathArePutInAbsoluteLocation() {
copyRequest := &dto.UpdateFileSystemRequest{Copy: []dto.File{
{Path: tests.FileNameWithAbsolutePath, Content: []byte(tests.DefaultFileContent)}}}
err := s.runner.UpdateFileSystem(copyRequest)
s.Require().NoError(err)
tarFiles := s.readFilesFromTarArchive(s.stdin)
s.Len(tarFiles, 1)
s.Equal(tarFiles[0].Name, tests.FileNameWithAbsolutePath)
}
func (s *UpdateFileSystemTestSuite) TestDirectoriesAreMarkedAsDirectoryInTar() {
copyRequest := &dto.UpdateFileSystemRequest{Copy: []dto.File{{Path: tests.DefaultDirectoryName, Content: []byte{}}}}
err := s.runner.UpdateFileSystem(copyRequest)
s.Require().NoError(err)
tarFiles := s.readFilesFromTarArchive(s.stdin)
s.Len(tarFiles, 1)
tarFile := tarFiles[0]
s.True(strings.HasSuffix(tarFile.Name+"/", tests.DefaultDirectoryName))
s.Equal(byte(tar.TypeDir), tarFile.TypeFlag)
s.Equal("", tarFile.Content)
}
func (s *UpdateFileSystemTestSuite) TestFilesToRemoveGetRemoved() {
copyRequest := &dto.UpdateFileSystemRequest{Delete: []dto.FilePath{tests.DefaultFileName}}
err := s.runner.UpdateFileSystem(copyRequest)
s.NoError(err)
s.apiMock.AssertCalled(s.T(), "ExecuteCommand", mock.Anything, mock.Anything, mock.Anything, false,
mock.Anything, mock.Anything, mock.Anything)
s.Regexp(fmt.Sprintf("rm[^;]+%s' *;", regexp.QuoteMeta(tests.DefaultFileName)), s.command)
}
func (s *UpdateFileSystemTestSuite) TestFilesToRemoveGetEscaped() {
copyRequest := &dto.UpdateFileSystemRequest{Delete: []dto.FilePath{"/some/potentially/harmful'filename"}}
err := s.runner.UpdateFileSystem(copyRequest)
s.NoError(err)
s.apiMock.AssertCalled(s.T(), "ExecuteCommand", mock.Anything, mock.Anything, mock.Anything, false,
mock.Anything, mock.Anything, mock.Anything)
s.Contains(strings.Join(s.command, " "), "'/some/potentially/harmful'\\''filename'")
}
func (s *UpdateFileSystemTestSuite) TestResetTimerGetsCalled() {
copyRequest := &dto.UpdateFileSystemRequest{}
err := s.runner.UpdateFileSystem(copyRequest)
s.NoError(err)
s.timer.AssertCalled(s.T(), "ResetTimeout")
}
type TarFile struct {
Name string
Content string
TypeFlag byte
}
func (s *UpdateFileSystemTestSuite) readFilesFromTarArchive(tarArchive io.Reader) (files []TarFile) {
reader := tar.NewReader(tarArchive)
for {
hdr, err := reader.Next()
if err != nil {
break
}
bf, err := io.ReadAll(reader)
s.Require().NoError(err)
files = append(files, TarFile{Name: hdr.Name, Content: string(bf), TypeFlag: hdr.Typeflag})
}
return files
}
func TestInactivityTimerTestSuite(t *testing.T) {
suite.Run(t, new(InactivityTimerTestSuite))
}
type InactivityTimerTestSuite struct {
suite.Suite
runner Runner
manager *ManagerMock
returned chan bool
}
func (s *InactivityTimerTestSuite) SetupTest() {
s.returned = make(chan bool, 1)
s.manager = &ManagerMock{}
s.manager.On("Return", mock.Anything).Run(func(_ mock.Arguments) {
s.returned <- true
}).Return(nil)
s.runner = NewRunner(tests.DefaultRunnerID, s.manager)
s.runner.SetupTimeout(tests.ShortTimeout)
}
func (s *InactivityTimerTestSuite) TearDownTest() {
s.runner.StopTimeout()
}
func (s *InactivityTimerTestSuite) TestRunnerIsReturnedAfterTimeout() {
s.True(tests.ChannelReceivesSomething(s.returned, 2*tests.ShortTimeout))
}
func (s *InactivityTimerTestSuite) TestRunnerIsNotReturnedBeforeTimeout() {
s.False(tests.ChannelReceivesSomething(s.returned, tests.ShortTimeout/2))
}
func (s *InactivityTimerTestSuite) TestResetTimeoutExtendsTheDeadline() {
time.Sleep(3 * tests.ShortTimeout / 4)
s.runner.ResetTimeout()
s.False(tests.ChannelReceivesSomething(s.returned, 3*tests.ShortTimeout/4),
"Because of the reset, the timeout should not be reached by now.")
s.True(tests.ChannelReceivesSomething(s.returned, 5*tests.ShortTimeout/4),
"After reset, the timout should be reached by now.")
}
func (s *InactivityTimerTestSuite) TestStopTimeoutStopsTimeout() {
s.runner.StopTimeout()
s.False(tests.ChannelReceivesSomething(s.returned, 2*tests.ShortTimeout))
}
func (s *InactivityTimerTestSuite) TestTimeoutPassedReturnsFalseBeforeDeadline() {
s.False(s.runner.TimeoutPassed())
}
func (s *InactivityTimerTestSuite) TestTimeoutPassedReturnsTrueAfterDeadline() {
time.Sleep(2 * tests.ShortTimeout)
s.True(s.runner.TimeoutPassed())
}
func (s *InactivityTimerTestSuite) TestTimerIsNotResetAfterDeadline() {
time.Sleep(2 * tests.ShortTimeout)
// We need to empty the returned channel so Return can send to it again.
tests.ChannelReceivesSomething(s.returned, 0)
s.runner.ResetTimeout()
s.False(tests.ChannelReceivesSomething(s.returned, 2*tests.ShortTimeout))
}
func (s *InactivityTimerTestSuite) TestSetupTimeoutStopsOldTimeout() {
s.runner.SetupTimeout(3 * tests.ShortTimeout)
s.False(tests.ChannelReceivesSomething(s.returned, 2*tests.ShortTimeout))
s.True(tests.ChannelReceivesSomething(s.returned, 2*tests.ShortTimeout))
}
func (s *InactivityTimerTestSuite) TestTimerIsInactiveWhenDurationIsZero() {
s.runner.SetupTimeout(0)
s.False(tests.ChannelReceivesSomething(s.returned, tests.ShortTimeout))
}
// NewRunner creates a new runner with the provided id and manager.
func NewRunner(id string, manager Manager) Runner {
return NewNomadJob(id, nil, nil, manager)
}

View File

@ -0,0 +1,77 @@
package runner
import (
"sync"
)
// Storage is an interface for storing runners.
type Storage interface {
// Add adds an runner to the storage.
// It overwrites the old runner if one with the same id was already stored.
Add(Runner)
// Get returns a runner from the storage.
// Iff the runner does not exist in the storage, ok will be false.
Get(id string) (r Runner, ok bool)
// Delete deletes the runner with the passed id from the storage.
// It does nothing if no runner with the id is present in the store.
Delete(id string)
// Length returns the number of currently stored runners in the storage.
Length() int
// Sample returns and removes an arbitrary runner from the storage.
// ok is true iff a runner was returned.
Sample() (r Runner, ok bool)
}
// localRunnerStorage stores runner objects in the local application memory.
// ToDo: Create implementation that use some persistent storage like a database.
type localRunnerStorage struct {
sync.RWMutex
runners map[string]Runner
}
// NewLocalRunnerStorage responds with a Storage implementation.
// This implementation stores the data thread-safe in the local application memory.
func NewLocalRunnerStorage() *localRunnerStorage {
return &localRunnerStorage{
runners: make(map[string]Runner),
}
}
func (s *localRunnerStorage) Add(r Runner) {
s.Lock()
defer s.Unlock()
s.runners[r.ID()] = r
}
func (s *localRunnerStorage) Get(id string) (r Runner, ok bool) {
s.RLock()
defer s.RUnlock()
r, ok = s.runners[id]
return
}
func (s *localRunnerStorage) Delete(id string) {
s.Lock()
defer s.Unlock()
delete(s.runners, id)
}
func (s *localRunnerStorage) Sample() (Runner, bool) {
s.Lock()
defer s.Unlock()
for _, runner := range s.runners {
delete(s.runners, runner.ID())
return runner, true
}
return nil, false
}
func (s *localRunnerStorage) Length() int {
s.RLock()
defer s.RUnlock()
return len(s.runners)
}

View File

@ -0,0 +1,103 @@
package runner
import (
"github.com/stretchr/testify/suite"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/tests"
"testing"
)
func TestRunnerPoolTestSuite(t *testing.T) {
suite.Run(t, new(RunnerPoolTestSuite))
}
type RunnerPoolTestSuite struct {
suite.Suite
runnerStorage *localRunnerStorage
runner Runner
}
func (s *RunnerPoolTestSuite) SetupTest() {
s.runnerStorage = NewLocalRunnerStorage()
s.runner = NewRunner(tests.DefaultRunnerID, nil)
s.runner.Add(tests.DefaultExecutionID, &dto.ExecutionRequest{Command: "true"})
}
func (s *RunnerPoolTestSuite) TestAddedRunnerCanBeRetrieved() {
s.runnerStorage.Add(s.runner)
retrievedRunner, ok := s.runnerStorage.Get(s.runner.ID())
s.True(ok, "A saved runner should be retrievable")
s.Equal(s.runner, retrievedRunner)
}
func (s *RunnerPoolTestSuite) TestRunnerWithSameIdOverwritesOldOne() {
otherRunnerWithSameID := NewRunner(s.runner.ID(), nil)
// assure runner is actually different
s.NotEqual(s.runner, otherRunnerWithSameID)
s.runnerStorage.Add(s.runner)
s.runnerStorage.Add(otherRunnerWithSameID)
retrievedRunner, _ := s.runnerStorage.Get(s.runner.ID())
s.NotEqual(s.runner, retrievedRunner)
s.Equal(otherRunnerWithSameID, retrievedRunner)
}
func (s *RunnerPoolTestSuite) TestDeletedRunnersAreNotAccessible() {
s.runnerStorage.Add(s.runner)
s.runnerStorage.Delete(s.runner.ID())
retrievedRunner, ok := s.runnerStorage.Get(s.runner.ID())
s.Nil(retrievedRunner)
s.False(ok, "A deleted runner should not be accessible")
}
func (s *RunnerPoolTestSuite) TestSampleReturnsRunnerWhenOneIsAvailable() {
s.runnerStorage.Add(s.runner)
sampledRunner, ok := s.runnerStorage.Sample()
s.NotNil(sampledRunner)
s.True(ok)
}
func (s *RunnerPoolTestSuite) TestSampleReturnsFalseWhenNoneIsAvailable() {
sampledRunner, ok := s.runnerStorage.Sample()
s.Nil(sampledRunner)
s.False(ok)
}
func (s *RunnerPoolTestSuite) TestSampleRemovesRunnerFromPool() {
s.runnerStorage.Add(s.runner)
sampledRunner, _ := s.runnerStorage.Sample()
_, ok := s.runnerStorage.Get(sampledRunner.ID())
s.False(ok)
}
func (s *RunnerPoolTestSuite) TestLenOfEmptyPoolIsZero() {
s.Equal(0, s.runnerStorage.Length())
}
func (s *RunnerPoolTestSuite) TestLenChangesOnStoreContentChange() {
s.Run("len increases when runner is added", func() {
s.runnerStorage.Add(s.runner)
s.Equal(1, s.runnerStorage.Length())
})
s.Run("len does not increase when runner with same id is added", func() {
s.runnerStorage.Add(s.runner)
s.Equal(1, s.runnerStorage.Length())
})
s.Run("len increases again when different runner is added", func() {
anotherRunner := NewRunner(tests.AnotherRunnerID, nil)
s.runnerStorage.Add(anotherRunner)
s.Equal(2, s.runnerStorage.Length())
})
s.Run("len decreases when runner is deleted", func() {
s.runnerStorage.Delete(s.runner.ID())
s.Equal(1, s.runnerStorage.Length())
})
s.Run("len decreases when runner is sampled", func() {
_, _ = s.runnerStorage.Sample()
s.Equal(0, s.runnerStorage.Length())
})
}