Implement routes to list, get and delete execution environments

* #9 Implement routes to list, get and delete execution environments.
A refactoring was required to introduce the ExecutionEnvironment interface.

* Fix MR comments, linting issues and bug that lead to e2e test failure

* Add e2e tests

* Add unit tests
This commit is contained in:
Maximilian Paß
2021-10-21 10:33:52 +02:00
committed by GitHub
parent 71cf21abce
commit 34d4bb7ea0
31 changed files with 2239 additions and 1065 deletions

View File

@@ -3,15 +3,12 @@ package environment
import (
_ "embed"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/jobspec2"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/openHPI/poseidon/internal/nomad"
"github.com/openHPI/poseidon/internal/runner"
"github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/pkg/logging"
"os"
"strconv"
)
// templateEnvironmentJobHCL holds our default job in HCL format.
@@ -28,13 +25,31 @@ type Manager interface {
// It should be called during the startup process (e.g. on creation of the Manager).
Load() error
// List returns all environments known by Poseidon.
// When `fetch` is set the environments are fetched from the executor before returning.
List(fetch bool) ([]runner.ExecutionEnvironment, error)
// Get returns the details of the requested environment.
// When `fetch` is set the requested environment is fetched from the executor before returning.
Get(id dto.EnvironmentID, fetch bool) (runner.ExecutionEnvironment, error)
// CreateOrUpdate creates/updates an execution environment on the executor.
// If the job was created, the returned boolean is true, if it was updated, it is false.
// If err is not nil, that means the environment was neither created nor updated.
CreateOrUpdate(
id runner.EnvironmentID,
id dto.EnvironmentID,
request dto.ExecutionEnvironmentRequest,
) (bool, error)
// Delete removes the specified execution environment.
// Iff the specified environment could not be found Delete returns false.
Delete(id dto.EnvironmentID) (bool, error)
}
type NomadEnvironmentManager struct {
runnerManager runner.Manager
api nomad.ExecutorAPI
templateEnvironmentHCL string
}
func NewNomadEnvironmentManager(
@@ -45,11 +60,8 @@ func NewNomadEnvironmentManager(
if err := loadTemplateEnvironmentJobHCL(templateJobFile); err != nil {
return nil, err
}
templateEnvironmentJob, err := parseJob(templateEnvironmentJobHCL)
if err != nil {
return nil, err
}
m := &NomadEnvironmentManager{runnerManager, apiClient, *templateEnvironmentJob}
m := &NomadEnvironmentManager{runnerManager, apiClient, templateEnvironmentJobHCL}
if err := m.Load(); err != nil {
log.WithError(err).Error("Error recovering the execution environments")
}
@@ -57,6 +69,121 @@ func NewNomadEnvironmentManager(
return m, nil
}
func (m *NomadEnvironmentManager) Get(id dto.EnvironmentID, fetch bool) (
executionEnvironment runner.ExecutionEnvironment, err error) {
executionEnvironment, ok := m.runnerManager.GetEnvironment(id)
if fetch {
fetchedEnvironment, err := fetchEnvironment(id, m.api)
switch {
case err != nil:
return nil, err
case fetchedEnvironment == nil:
_, err = m.Delete(id)
if err != nil {
return nil, err
}
ok = false
case !ok:
m.runnerManager.SetEnvironment(fetchedEnvironment)
executionEnvironment = fetchedEnvironment
ok = true
default:
executionEnvironment.SetConfigFrom(fetchedEnvironment)
}
}
if !ok {
err = runner.ErrUnknownExecutionEnvironment
}
return executionEnvironment, err
}
func (m *NomadEnvironmentManager) List(fetch bool) ([]runner.ExecutionEnvironment, error) {
if fetch {
err := m.Load()
if err != nil {
return nil, err
}
}
return m.runnerManager.ListEnvironments(), nil
}
func (m *NomadEnvironmentManager) CreateOrUpdate(id dto.EnvironmentID, request dto.ExecutionEnvironmentRequest) (
created bool, err error) {
environment, ok := m.runnerManager.GetEnvironment(id)
if !ok {
environment, err = NewNomadEnvironment(m.templateEnvironmentHCL)
if err != nil {
return false, fmt.Errorf("error creating Nomad environment: %w", err)
}
environment.SetID(id)
}
environment.SetPrewarmingPoolSize(request.PrewarmingPoolSize)
environment.SetCPULimit(request.CPULimit)
environment.SetMemoryLimit(request.MemoryLimit)
environment.SetImage(request.Image)
environment.SetNetworkAccess(request.NetworkAccess, request.ExposedPorts)
created = m.runnerManager.SetEnvironment(environment)
err = environment.Register(m.api)
if err != nil {
return false, fmt.Errorf("error registering template job in API: %w", err)
}
err = environment.UpdateRunnerSpecs(m.api)
if err != nil {
return false, fmt.Errorf("error updating runner jobs in API: %w", err)
}
err = environment.Scale(m.api)
if err != nil {
return false, fmt.Errorf("error scaling template job in API: %w", err)
}
return created, nil
}
func (m *NomadEnvironmentManager) Delete(id dto.EnvironmentID) (bool, error) {
executionEnvironment, ok := m.runnerManager.GetEnvironment(id)
if !ok {
return false, nil
}
m.runnerManager.DeleteEnvironment(id)
err := executionEnvironment.Delete(m.api)
if err != nil {
return true, fmt.Errorf("could not delete environment: %w", err)
}
return true, nil
}
func (m *NomadEnvironmentManager) Load() error {
templateJobs, err := m.api.LoadEnvironmentJobs()
if err != nil {
return fmt.Errorf("couldn't load template jobs: %w", err)
}
for _, job := range templateJobs {
jobLogger := log.WithField("jobID", *job.ID)
if *job.Status != structs.JobStatusRunning {
jobLogger.Info("Job not running, skipping ...")
continue
}
configTaskGroup := nomad.FindOrCreateConfigTaskGroup(job)
if configTaskGroup == nil {
jobLogger.Info("Couldn't find config task group in job, skipping ...")
continue
}
environment := &NomadEnvironment{
jobHCL: templateEnvironmentJobHCL,
job: job,
idleRunners: runner.NewLocalRunnerStorage(),
}
m.runnerManager.SetEnvironment(environment)
jobLogger.Info("Successfully recovered environment")
}
return nil
}
// loadTemplateEnvironmentJobHCL loads the template environment job HCL from the given path.
// If the path is empty, the embedded default file is used.
func loadTemplateEnvironmentJobHCL(path string) error {
@@ -71,84 +198,25 @@ func loadTemplateEnvironmentJobHCL(path string) error {
return nil
}
type NomadEnvironmentManager struct {
runnerManager runner.Manager
api nomad.ExecutorAPI
templateEnvironmentJob nomadApi.Job
}
func (m *NomadEnvironmentManager) CreateOrUpdate(
id runner.EnvironmentID,
request dto.ExecutionEnvironmentRequest,
) (bool, error) {
templateJob, err := m.api.RegisterTemplateJob(&m.templateEnvironmentJob, runner.TemplateJobID(id),
request.PrewarmingPoolSize, request.CPULimit, request.MemoryLimit,
request.Image, request.NetworkAccess, request.ExposedPorts)
func fetchEnvironment(id dto.EnvironmentID, apiClient nomad.ExecutorAPI) (runner.ExecutionEnvironment, error) {
environments, err := apiClient.LoadEnvironmentJobs()
if err != nil {
return false, fmt.Errorf("error registering template job in API: %w", err)
return nil, fmt.Errorf("error fetching the environment jobs: %w", err)
}
created, err := m.runnerManager.CreateOrUpdateEnvironment(id, request.PrewarmingPoolSize, templateJob, true)
if err != nil {
return created, fmt.Errorf("error updating environment in runner manager: %w", err)
var fetchedEnvironment runner.ExecutionEnvironment
for _, job := range environments {
environmentID, err := nomad.EnvironmentIDFromTemplateJobID(*job.ID)
if err != nil {
log.WithError(err).Warn("Cannot parse environment id of loaded environment")
continue
}
if id == environmentID {
fetchedEnvironment = &NomadEnvironment{
jobHCL: templateEnvironmentJobHCL,
job: job,
idleRunners: runner.NewLocalRunnerStorage(),
}
}
}
return created, nil
}
func (m *NomadEnvironmentManager) Load() error {
templateJobs, err := m.api.LoadEnvironmentJobs()
if err != nil {
return fmt.Errorf("couldn't load template jobs: %w", err)
}
for _, job := range templateJobs {
jobLogger := log.WithField("jobID", *job.ID)
if *job.Status != structs.JobStatusRunning {
jobLogger.Info("Job not running, skipping ...")
continue
}
configTaskGroup := nomad.FindConfigTaskGroup(job)
if configTaskGroup == nil {
jobLogger.Info("Couldn't find config task group in job, skipping ...")
continue
}
desiredIdleRunnersCount, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaPoolSizeKey])
if err != nil {
jobLogger.Infof("Couldn't convert pool size to int: %v, skipping ...", err)
continue
}
environmentIDString, err := runner.EnvironmentIDFromTemplateJobID(*job.ID)
if err != nil {
jobLogger.WithError(err).Error("Couldn't retrieve environment id from template job")
}
environmentID, err := runner.NewEnvironmentID(environmentIDString)
if err != nil {
jobLogger.WithField("environmentID", environmentIDString).
WithError(err).
Error("Couldn't retrieve environmentID from string")
continue
}
_, err = m.runnerManager.CreateOrUpdateEnvironment(environmentID, uint(desiredIdleRunnersCount), job, false)
if err != nil {
jobLogger.WithError(err).Info("Could not recover job.")
continue
}
jobLogger.Info("Successfully recovered environment")
}
return nil
}
func parseJob(jobHCL string) (*nomadApi.Job, error) {
config := jobspec2.ParseConfig{
Body: []byte(jobHCL),
AllowFS: false,
Strict: true,
}
job, err := jobspec2.ParseWithConfig(&config)
if err != nil {
return nil, fmt.Errorf("error parsing Nomad job: %w", err)
}
return job, nil
return fetchedEnvironment, nil
}