Add architecture for multiple managers
using the chain of responsibility pattern.
This commit is contained in:
52
internal/runner/abstract_manager.go
Normal file
52
internal/runner/abstract_manager.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
)
|
||||
|
||||
var ErrNullObject = errors.New("functionality not available for the null object")
|
||||
|
||||
// AbstractManager is used to have a fallback runner manager in the chain of responsibility
|
||||
// following the null object pattern.
|
||||
type AbstractManager struct {
|
||||
nextHandler AccessorHandler
|
||||
}
|
||||
|
||||
func (n *AbstractManager) SetNextHandler(next AccessorHandler) {
|
||||
n.nextHandler = next
|
||||
}
|
||||
|
||||
func (n *AbstractManager) NextHandler() AccessorHandler {
|
||||
return n.nextHandler
|
||||
}
|
||||
|
||||
func (n *AbstractManager) ListEnvironments() []ExecutionEnvironment {
|
||||
return []ExecutionEnvironment{}
|
||||
}
|
||||
|
||||
func (n *AbstractManager) GetEnvironment(_ dto.EnvironmentID) (ExecutionEnvironment, bool) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (n *AbstractManager) StoreEnvironment(_ ExecutionEnvironment) {}
|
||||
|
||||
func (n *AbstractManager) DeleteEnvironment(_ dto.EnvironmentID) {}
|
||||
|
||||
func (n *AbstractManager) EnvironmentStatistics() map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData {
|
||||
return map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{}
|
||||
}
|
||||
|
||||
func (n *AbstractManager) Claim(_ dto.EnvironmentID, _ int) (Runner, error) {
|
||||
return nil, ErrNullObject
|
||||
}
|
||||
|
||||
func (n *AbstractManager) Get(_ string) (Runner, error) {
|
||||
return nil, ErrNullObject
|
||||
}
|
||||
|
||||
func (n *AbstractManager) Return(_ Runner) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *AbstractManager) Load() {}
|
57
internal/runner/aws_manager.go
Normal file
57
internal/runner/aws_manager.go
Normal file
@@ -0,0 +1,57 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
)
|
||||
|
||||
type AWSRunnerManager struct {
|
||||
*AbstractManager
|
||||
}
|
||||
|
||||
// NewAWSRunnerManager creates a new runner manager that keeps track of all runners at AWS.
|
||||
func NewAWSRunnerManager() *AWSRunnerManager {
|
||||
return &AWSRunnerManager{&AbstractManager{}}
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) ListEnvironments() []ExecutionEnvironment {
|
||||
return []ExecutionEnvironment{}
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) GetEnvironment(_ dto.EnvironmentID) (ExecutionEnvironment, bool) {
|
||||
return nil, false
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) StoreEnvironment(_ ExecutionEnvironment) {}
|
||||
|
||||
func (a AWSRunnerManager) DeleteEnvironment(_ dto.EnvironmentID) {}
|
||||
|
||||
func (a AWSRunnerManager) EnvironmentStatistics() map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData {
|
||||
return map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData{}
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) Claim(id dto.EnvironmentID, duration int) (Runner, error) {
|
||||
r, err := a.NextHandler().Claim(id, duration)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("aws wraped: %w", err)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) Get(runnerID string) (Runner, error) {
|
||||
r, err := a.NextHandler().Get(runnerID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("aws wraped: %w", err)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) Return(r Runner) error {
|
||||
err := a.NextHandler().Return(r)
|
||||
if err != nil {
|
||||
return fmt.Errorf("aws wraped: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (a AWSRunnerManager) Load() {}
|
@@ -37,11 +37,11 @@ type InactivityTimerImplementation struct {
|
||||
duration time.Duration
|
||||
state TimerState
|
||||
runner Runner
|
||||
manager Manager
|
||||
manager Accessor
|
||||
mu sync.Mutex
|
||||
}
|
||||
|
||||
func NewInactivityTimer(runner Runner, manager Manager) InactivityTimer {
|
||||
func NewInactivityTimer(runner Runner, manager Accessor) InactivityTimer {
|
||||
return &InactivityTimerImplementation{
|
||||
state: TimerInactive,
|
||||
runner: runner,
|
||||
|
@@ -1,24 +1,9 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
nomadApi "github.com/hashicorp/nomad/api"
|
||||
"github.com/openHPI/poseidon/internal/nomad"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/pkg/logging"
|
||||
"github.com/sirupsen/logrus"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
log = logging.GetLogger("runner")
|
||||
ErrUnknownExecutionEnvironment = errors.New("execution environment not found")
|
||||
ErrNoRunnersAvailable = errors.New("no runners available for this execution environment")
|
||||
ErrRunnerNotFound = errors.New("no runner found with this id")
|
||||
)
|
||||
|
||||
// ExecutionEnvironment are groups of runner that share the configuration stored in the environment.
|
||||
@@ -67,6 +52,12 @@ type ExecutionEnvironment interface {
|
||||
// Manager keeps track of the used and unused runners of all execution environments in order to provide unused
|
||||
// runners to new clients and ensure no runner is used twice.
|
||||
type Manager interface {
|
||||
EnvironmentAccessor
|
||||
AccessorHandler
|
||||
}
|
||||
|
||||
// EnvironmentAccessor provides access to the stored environments.
|
||||
type EnvironmentAccessor interface {
|
||||
// ListEnvironments returns all execution environments known by Poseidon.
|
||||
ListEnvironments() []ExecutionEnvironment
|
||||
|
||||
@@ -83,7 +74,18 @@ type Manager interface {
|
||||
|
||||
// EnvironmentStatistics returns statistical data for each execution environment.
|
||||
EnvironmentStatistics() map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData
|
||||
}
|
||||
|
||||
// AccessorHandler is one handler in te chain of responsibility of runner accessors.
|
||||
// Each runner accessor can handle different requests.
|
||||
type AccessorHandler interface {
|
||||
Accessor
|
||||
SetNextHandler(m AccessorHandler)
|
||||
NextHandler() AccessorHandler
|
||||
}
|
||||
|
||||
// Accessor manages the lifecycle of Runner.
|
||||
type Accessor interface {
|
||||
// Claim returns a new runner. The runner is deleted after duration seconds if duration is not 0.
|
||||
// It makes sure that the runner is not in use yet and returns an error if no runner could be provided.
|
||||
Claim(id dto.EnvironmentID, duration int) (Runner, error)
|
||||
@@ -100,201 +102,3 @@ type Manager interface {
|
||||
// It should be called during the startup process (e.g. on creation of the Manager).
|
||||
Load()
|
||||
}
|
||||
|
||||
type NomadRunnerManager struct {
|
||||
apiClient nomad.ExecutorAPI
|
||||
environments EnvironmentStorage
|
||||
usedRunners Storage
|
||||
}
|
||||
|
||||
// NewNomadRunnerManager creates a new runner manager that keeps track of all runners.
|
||||
// It uses the apiClient for all requests and runs a background task to keep the runners in sync with Nomad.
|
||||
// If you cancel the context the background synchronization will be stopped.
|
||||
func NewNomadRunnerManager(apiClient nomad.ExecutorAPI, ctx context.Context) *NomadRunnerManager {
|
||||
m := &NomadRunnerManager{
|
||||
apiClient,
|
||||
NewLocalEnvironmentStorage(),
|
||||
NewLocalRunnerStorage(),
|
||||
}
|
||||
go m.keepRunnersSynced(ctx)
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) ListEnvironments() []ExecutionEnvironment {
|
||||
return m.environments.List()
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) GetEnvironment(id dto.EnvironmentID) (ExecutionEnvironment, bool) {
|
||||
return m.environments.Get(id)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) StoreEnvironment(environment ExecutionEnvironment) {
|
||||
m.environments.Add(environment)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) DeleteEnvironment(id dto.EnvironmentID) {
|
||||
m.environments.Delete(id)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) EnvironmentStatistics() map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData {
|
||||
environments := make(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData)
|
||||
for _, e := range m.environments.List() {
|
||||
environments[e.ID()] = &dto.StatisticalExecutionEnvironmentData{
|
||||
ID: int(e.ID()),
|
||||
PrewarmingPoolSize: e.PrewarmingPoolSize(),
|
||||
IdleRunners: uint(e.IdleRunnerCount()),
|
||||
UsedRunners: 0,
|
||||
}
|
||||
}
|
||||
|
||||
for _, r := range m.usedRunners.List() {
|
||||
id, err := nomad.EnvironmentIDFromRunnerID(r.ID())
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Stored runners must have correct IDs")
|
||||
}
|
||||
environments[id].UsedRunners++
|
||||
}
|
||||
return environments
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Claim(environmentID dto.EnvironmentID, duration int) (Runner, error) {
|
||||
environment, ok := m.environments.Get(environmentID)
|
||||
if !ok {
|
||||
return nil, ErrUnknownExecutionEnvironment
|
||||
}
|
||||
runner, ok := environment.Sample(m.apiClient)
|
||||
if !ok {
|
||||
return nil, ErrNoRunnersAvailable
|
||||
}
|
||||
|
||||
m.usedRunners.Add(runner)
|
||||
go m.markRunnerAsUsed(runner, duration)
|
||||
|
||||
runner.SetupTimeout(time.Duration(duration) * time.Second)
|
||||
return runner, nil
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) markRunnerAsUsed(runner Runner, timeoutDuration int) {
|
||||
err := m.apiClient.MarkRunnerAsUsed(runner.ID(), timeoutDuration)
|
||||
if err != nil {
|
||||
err = m.Return(runner)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("runnerID", runner.ID()).Error("can't mark runner as used and can't return runner")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Get(runnerID string) (Runner, error) {
|
||||
runner, ok := m.usedRunners.Get(runnerID)
|
||||
if !ok {
|
||||
return nil, ErrRunnerNotFound
|
||||
}
|
||||
return runner, nil
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Return(r Runner) error {
|
||||
r.StopTimeout()
|
||||
err := m.apiClient.DeleteJob(r.ID())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error deleting runner in Nomad: %w", err)
|
||||
}
|
||||
m.usedRunners.Delete(r.ID())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Load() {
|
||||
for _, environment := range m.environments.List() {
|
||||
environmentLogger := log.WithField("environmentID", environment.ID())
|
||||
runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID())
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Warn("Error fetching the runner jobs")
|
||||
}
|
||||
for _, job := range runnerJobs {
|
||||
m.loadSingleJob(job, environmentLogger, environment)
|
||||
}
|
||||
err = environment.ApplyPrewarmingPoolSize(m.apiClient)
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Error("Couldn't scale environment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) loadSingleJob(job *nomadApi.Job, environmentLogger *logrus.Entry,
|
||||
environment ExecutionEnvironment) {
|
||||
configTaskGroup := nomad.FindTaskGroup(job, nomad.ConfigTaskGroupName)
|
||||
if configTaskGroup == nil {
|
||||
environmentLogger.Infof("Couldn't find config task group in job %s, skipping ...", *job.ID)
|
||||
return
|
||||
}
|
||||
isUsed := configTaskGroup.Meta[nomad.ConfigMetaUsedKey] == nomad.ConfigMetaUsedValue
|
||||
portMappings, err := m.apiClient.LoadRunnerPortMappings(*job.ID)
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Warn("Error loading runner portMappings")
|
||||
return
|
||||
}
|
||||
newJob := NewNomadJob(*job.ID, portMappings, m.apiClient, m)
|
||||
if isUsed {
|
||||
m.usedRunners.Add(newJob)
|
||||
timeout, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaTimeoutKey])
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Warn("Error loading timeout from meta values")
|
||||
} else {
|
||||
newJob.SetupTimeout(time.Duration(timeout) * time.Second)
|
||||
}
|
||||
} else {
|
||||
environment.AddRunner(newJob)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) keepRunnersSynced(ctx context.Context) {
|
||||
retries := 0
|
||||
for ctx.Err() == nil {
|
||||
err := m.apiClient.WatchEventStream(ctx, m.onAllocationAdded, m.onAllocationStopped)
|
||||
retries += 1
|
||||
log.WithError(err).Errorf("Stopped updating the runners! Retry %v", retries)
|
||||
<-time.After(time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation) {
|
||||
log.WithField("id", alloc.JobID).Debug("Runner started")
|
||||
|
||||
if nomad.IsEnvironmentTemplateID(alloc.JobID) {
|
||||
return
|
||||
}
|
||||
|
||||
environmentID, err := nomad.EnvironmentIDFromRunnerID(alloc.JobID)
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("Allocation could not be added")
|
||||
return
|
||||
}
|
||||
|
||||
environment, ok := m.environments.Get(environmentID)
|
||||
if ok {
|
||||
var mappedPorts []nomadApi.PortMapping
|
||||
if alloc.AllocatedResources != nil {
|
||||
mappedPorts = alloc.AllocatedResources.Shared.Ports
|
||||
}
|
||||
environment.AddRunner(NewNomadJob(alloc.JobID, mappedPorts, m.apiClient, m))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) onAllocationStopped(alloc *nomadApi.Allocation) {
|
||||
log.WithField("id", alloc.JobID).Debug("Runner stopped")
|
||||
|
||||
if nomad.IsEnvironmentTemplateID(alloc.JobID) {
|
||||
return
|
||||
}
|
||||
|
||||
environmentID, err := nomad.EnvironmentIDFromRunnerID(alloc.JobID)
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("Stopped allocation can not be handled")
|
||||
return
|
||||
}
|
||||
|
||||
m.usedRunners.Delete(alloc.JobID)
|
||||
environment, ok := m.environments.Get(environmentID)
|
||||
if ok {
|
||||
environment.DeleteRunner(alloc.JobID)
|
||||
}
|
||||
}
|
||||
|
@@ -123,6 +123,22 @@ func (_m *ManagerMock) Load() {
|
||||
_m.Called()
|
||||
}
|
||||
|
||||
// NextHandler provides a mock function with given fields:
|
||||
func (_m *ManagerMock) NextHandler() AccessorHandler {
|
||||
ret := _m.Called()
|
||||
|
||||
var r0 AccessorHandler
|
||||
if rf, ok := ret.Get(0).(func() AccessorHandler); ok {
|
||||
r0 = rf()
|
||||
} else {
|
||||
if ret.Get(0) != nil {
|
||||
r0 = ret.Get(0).(AccessorHandler)
|
||||
}
|
||||
}
|
||||
|
||||
return r0
|
||||
}
|
||||
|
||||
// Return provides a mock function with given fields: r
|
||||
func (_m *ManagerMock) Return(r Runner) error {
|
||||
ret := _m.Called(r)
|
||||
@@ -137,6 +153,11 @@ func (_m *ManagerMock) Return(r Runner) error {
|
||||
return r0
|
||||
}
|
||||
|
||||
// SetNextHandler provides a mock function with given fields: m
|
||||
func (_m *ManagerMock) SetNextHandler(m AccessorHandler) {
|
||||
_m.Called(m)
|
||||
}
|
||||
|
||||
// StoreEnvironment provides a mock function with given fields: environment
|
||||
func (_m *ManagerMock) StoreEnvironment(environment ExecutionEnvironment) {
|
||||
_m.Called(environment)
|
||||
|
221
internal/runner/nomad_manager.go
Normal file
221
internal/runner/nomad_manager.go
Normal file
@@ -0,0 +1,221 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
nomadApi "github.com/hashicorp/nomad/api"
|
||||
"github.com/openHPI/poseidon/internal/nomad"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/pkg/logging"
|
||||
"github.com/sirupsen/logrus"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
log = logging.GetLogger("runner")
|
||||
ErrUnknownExecutionEnvironment = errors.New("execution environment not found")
|
||||
ErrNoRunnersAvailable = errors.New("no runners available for this execution environment")
|
||||
ErrRunnerNotFound = errors.New("no runner found with this id")
|
||||
)
|
||||
|
||||
type NomadRunnerManager struct {
|
||||
*AbstractManager
|
||||
apiClient nomad.ExecutorAPI
|
||||
environments EnvironmentStorage
|
||||
usedRunners Storage
|
||||
}
|
||||
|
||||
// NewNomadRunnerManager creates a new runner manager that keeps track of all runners.
|
||||
// It uses the apiClient for all requests and runs a background task to keep the runners in sync with Nomad.
|
||||
// If you cancel the context the background synchronization will be stopped.
|
||||
func NewNomadRunnerManager(apiClient nomad.ExecutorAPI, ctx context.Context) *NomadRunnerManager {
|
||||
m := &NomadRunnerManager{
|
||||
&AbstractManager{},
|
||||
apiClient,
|
||||
NewLocalEnvironmentStorage(),
|
||||
NewLocalRunnerStorage(),
|
||||
}
|
||||
go m.keepRunnersSynced(ctx)
|
||||
return m
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) ListEnvironments() []ExecutionEnvironment {
|
||||
return m.environments.List()
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) GetEnvironment(id dto.EnvironmentID) (ExecutionEnvironment, bool) {
|
||||
return m.environments.Get(id)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) StoreEnvironment(environment ExecutionEnvironment) {
|
||||
m.environments.Add(environment)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) DeleteEnvironment(id dto.EnvironmentID) {
|
||||
m.environments.Delete(id)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) EnvironmentStatistics() map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData {
|
||||
environments := make(map[dto.EnvironmentID]*dto.StatisticalExecutionEnvironmentData)
|
||||
for _, e := range m.environments.List() {
|
||||
environments[e.ID()] = &dto.StatisticalExecutionEnvironmentData{
|
||||
ID: int(e.ID()),
|
||||
PrewarmingPoolSize: e.PrewarmingPoolSize(),
|
||||
IdleRunners: uint(e.IdleRunnerCount()),
|
||||
UsedRunners: 0,
|
||||
}
|
||||
}
|
||||
|
||||
for _, r := range m.usedRunners.List() {
|
||||
id, err := nomad.EnvironmentIDFromRunnerID(r.ID())
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Stored runners must have correct IDs")
|
||||
}
|
||||
environments[id].UsedRunners++
|
||||
}
|
||||
return environments
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Claim(environmentID dto.EnvironmentID, duration int) (Runner, error) {
|
||||
environment, ok := m.environments.Get(environmentID)
|
||||
if !ok {
|
||||
return nil, ErrUnknownExecutionEnvironment
|
||||
}
|
||||
runner, ok := environment.Sample(m.apiClient)
|
||||
if !ok {
|
||||
return nil, ErrNoRunnersAvailable
|
||||
}
|
||||
|
||||
m.usedRunners.Add(runner)
|
||||
go m.markRunnerAsUsed(runner, duration)
|
||||
|
||||
runner.SetupTimeout(time.Duration(duration) * time.Second)
|
||||
return runner, nil
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) markRunnerAsUsed(runner Runner, timeoutDuration int) {
|
||||
err := m.apiClient.MarkRunnerAsUsed(runner.ID(), timeoutDuration)
|
||||
if err != nil {
|
||||
err = m.Return(runner)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField("runnerID", runner.ID()).Error("can't mark runner as used and can't return runner")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Get(runnerID string) (Runner, error) {
|
||||
runner, ok := m.usedRunners.Get(runnerID)
|
||||
if !ok {
|
||||
return nil, ErrRunnerNotFound
|
||||
}
|
||||
return runner, nil
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Return(r Runner) error {
|
||||
r.StopTimeout()
|
||||
err := m.apiClient.DeleteJob(r.ID())
|
||||
if err != nil {
|
||||
return fmt.Errorf("error deleting runner in Nomad: %w", err)
|
||||
}
|
||||
m.usedRunners.Delete(r.ID())
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) Load() {
|
||||
for _, environment := range m.environments.List() {
|
||||
environmentLogger := log.WithField("environmentID", environment.ID())
|
||||
runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID())
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Warn("Error fetching the runner jobs")
|
||||
}
|
||||
for _, job := range runnerJobs {
|
||||
m.loadSingleJob(job, environmentLogger, environment)
|
||||
}
|
||||
err = environment.ApplyPrewarmingPoolSize(m.apiClient)
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Error("Couldn't scale environment")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) loadSingleJob(job *nomadApi.Job, environmentLogger *logrus.Entry,
|
||||
environment ExecutionEnvironment) {
|
||||
configTaskGroup := nomad.FindTaskGroup(job, nomad.ConfigTaskGroupName)
|
||||
if configTaskGroup == nil {
|
||||
environmentLogger.Infof("Couldn't find config task group in job %s, skipping ...", *job.ID)
|
||||
return
|
||||
}
|
||||
isUsed := configTaskGroup.Meta[nomad.ConfigMetaUsedKey] == nomad.ConfigMetaUsedValue
|
||||
portMappings, err := m.apiClient.LoadRunnerPortMappings(*job.ID)
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Warn("Error loading runner portMappings")
|
||||
return
|
||||
}
|
||||
newJob := NewNomadJob(*job.ID, portMappings, m.apiClient, m)
|
||||
if isUsed {
|
||||
m.usedRunners.Add(newJob)
|
||||
timeout, err := strconv.Atoi(configTaskGroup.Meta[nomad.ConfigMetaTimeoutKey])
|
||||
if err != nil {
|
||||
environmentLogger.WithError(err).Warn("Error loading timeout from meta values")
|
||||
} else {
|
||||
newJob.SetupTimeout(time.Duration(timeout) * time.Second)
|
||||
}
|
||||
} else {
|
||||
environment.AddRunner(newJob)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) keepRunnersSynced(ctx context.Context) {
|
||||
retries := 0
|
||||
for ctx.Err() == nil {
|
||||
err := m.apiClient.WatchEventStream(ctx, m.onAllocationAdded, m.onAllocationStopped)
|
||||
retries += 1
|
||||
log.WithError(err).Errorf("Stopped updating the runners! Retry %v", retries)
|
||||
<-time.After(time.Second)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) onAllocationAdded(alloc *nomadApi.Allocation) {
|
||||
log.WithField("id", alloc.JobID).Debug("Runner started")
|
||||
|
||||
if nomad.IsEnvironmentTemplateID(alloc.JobID) {
|
||||
return
|
||||
}
|
||||
|
||||
environmentID, err := nomad.EnvironmentIDFromRunnerID(alloc.JobID)
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("Allocation could not be added")
|
||||
return
|
||||
}
|
||||
|
||||
environment, ok := m.environments.Get(environmentID)
|
||||
if ok {
|
||||
var mappedPorts []nomadApi.PortMapping
|
||||
if alloc.AllocatedResources != nil {
|
||||
mappedPorts = alloc.AllocatedResources.Shared.Ports
|
||||
}
|
||||
environment.AddRunner(NewNomadJob(alloc.JobID, mappedPorts, m.apiClient, m))
|
||||
}
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) onAllocationStopped(alloc *nomadApi.Allocation) {
|
||||
log.WithField("id", alloc.JobID).Debug("Runner stopped")
|
||||
|
||||
if nomad.IsEnvironmentTemplateID(alloc.JobID) {
|
||||
return
|
||||
}
|
||||
|
||||
environmentID, err := nomad.EnvironmentIDFromRunnerID(alloc.JobID)
|
||||
if err != nil {
|
||||
log.WithError(err).Warn("Stopped allocation can not be handled")
|
||||
return
|
||||
}
|
||||
|
||||
m.usedRunners.Delete(alloc.JobID)
|
||||
environment, ok := m.environments.Get(environmentID)
|
||||
if ok {
|
||||
environment.DeleteRunner(alloc.JobID)
|
||||
}
|
||||
}
|
@@ -79,12 +79,12 @@ type NomadJob struct {
|
||||
id string
|
||||
portMappings []nomadApi.PortMapping
|
||||
api nomad.ExecutorAPI
|
||||
manager Manager
|
||||
manager Accessor
|
||||
}
|
||||
|
||||
// NewNomadJob creates a new NomadJob with the provided id.
|
||||
func NewNomadJob(id string, portMappings []nomadApi.PortMapping,
|
||||
apiClient nomad.ExecutorAPI, manager Manager,
|
||||
apiClient nomad.ExecutorAPI, manager Accessor,
|
||||
) *NomadJob {
|
||||
job := &NomadJob{
|
||||
id: id,
|
||||
|
@@ -390,6 +390,6 @@ func (s *UpdateFileSystemTestSuite) readFilesFromTarArchive(tarArchive io.Reader
|
||||
}
|
||||
|
||||
// NewRunner creates a new runner with the provided id and manager.
|
||||
func NewRunner(id string, manager Manager) Runner {
|
||||
func NewRunner(id string, manager Accessor) Runner {
|
||||
return NewNomadJob(id, nil, nil, manager)
|
||||
}
|
||||
|
Reference in New Issue
Block a user