Use Nomad jobs as runners instead of allocations

As we can't control which allocations are destroyed when downscaling a job, we decided
to use Nomad jobs as our runners. Thus for each runner we prewarm for an environment,
a corresponding job is created in Nomad. We create a default job that serves as a template
for the runners. Using this, already existing execution environments can easily be restored,
once Poseidon is restarted.
This commit is contained in:
sirkrypt0
2021-06-08 14:42:35 +02:00
committed by Maximilian Paß
parent 8de489929e
commit c7d59810e5
20 changed files with 333 additions and 266 deletions

View File

@@ -19,15 +19,16 @@ const (
//go:embed default-job.hcl
var defaultJobHCL string
// registerJob creates a Nomad job based on the default job configuration and the given parameters.
// registerDefaultJob creates a Nomad job based on the default job configuration and the given parameters.
// It registers the job with Nomad and waits until the registration completes.
func (m *NomadEnvironmentManager) registerJob(
func (m *NomadEnvironmentManager) registerDefaultJob(
id string,
prewarmingPoolSize, cpuLimit, memoryLimit uint,
image string,
networkAccess bool,
exposedPorts []uint16) error {
job := createJob(m.defaultJob, id, prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
// TODO: store prewarming pool size in job meta information
job := createJob(m.defaultJob, nomad.DefaultJobID(id), prewarmingPoolSize, cpuLimit, memoryLimit, image, networkAccess, exposedPorts)
evalID, err := m.api.RegisterNomadJob(job)
if err != nil {
return err
@@ -76,7 +77,7 @@ func createTaskGroup(job *nomadApi.Job, name string, prewarmingPoolSize uint) *n
} else {
taskGroup = job.TaskGroups[0]
taskGroup.Name = &name
count := int(prewarmingPoolSize)
count := 1
taskGroup.Count = &count
}
return taskGroup