added k8s stub adapter for execution environment
This commit is contained in:
125
internal/runner/kubernetes_manager.go
Normal file
125
internal/runner/kubernetes_manager.go
Normal file
@ -0,0 +1,125 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/openHPI/poseidon/internal/kubernetes"
|
||||
"github.com/openHPI/poseidon/internal/nomad"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/pkg/storage"
|
||||
"github.com/openHPI/poseidon/pkg/util"
|
||||
appv1 "k8s.io/api/apps/v1"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
type KubernetesRunnerManager struct {
|
||||
*AbstractManager
|
||||
apiClient kubernetes.ExecutorAPI
|
||||
reloadingEnvironment storage.Storage[*alertData]
|
||||
}
|
||||
|
||||
func NewKubernetesRunnerManager(apiClient *kubernetes.ExecutorAPI, ctx context.Context) *KubernetesRunnerManager {
|
||||
return &KubernetesRunnerManager{
|
||||
AbstractManager: NewAbstractManager(ctx),
|
||||
apiClient: *apiClient,
|
||||
reloadingEnvironment: storage.NewLocalStorage[*alertData](),
|
||||
}
|
||||
}
|
||||
|
||||
// Load recovers all runners for all existing environments.
|
||||
func (k *KubernetesRunnerManager) Load() {
|
||||
log.Info("Loading runners")
|
||||
newUsedRunners := storage.NewLocalStorage[Runner]()
|
||||
for _, environment := range k.ListEnvironments() {
|
||||
usedRunners, err := k.loadEnvironment(environment)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField(dto.KeyEnvironmentID, environment.ID().ToString()).
|
||||
Warn("Failed loading environment. Skipping...")
|
||||
continue
|
||||
}
|
||||
for _, r := range usedRunners.List() {
|
||||
newUsedRunners.Add(r.ID(), r)
|
||||
}
|
||||
}
|
||||
// TODO MISSING IMPLEMENTATION
|
||||
//k.updateUsedRunners(newUsedRunners, true)
|
||||
}
|
||||
|
||||
func (k *KubernetesRunnerManager) loadEnvironment(environment ExecutionEnvironment) (used storage.Storage[Runner], err error) {
|
||||
used = storage.NewLocalStorage[Runner]()
|
||||
|
||||
runnerJobs, err := k.apiClient.LoadRunnerJobs(environment.ID())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed fetching the runner jobs: %w", err)
|
||||
}
|
||||
for _, job := range runnerJobs {
|
||||
r, isUsed, err := k.loadSingleJob(job, environment)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField(dto.KeyEnvironmentID, environment.ID().ToString()).
|
||||
WithField("used", isUsed).Warn("Failed loading job. Skipping...")
|
||||
continue
|
||||
} else if isUsed {
|
||||
used.Add(r.ID(), r)
|
||||
}
|
||||
}
|
||||
err = environment.ApplyPrewarmingPoolSize()
|
||||
if err != nil {
|
||||
return used, fmt.Errorf("couldn't scale environment: %w", err)
|
||||
}
|
||||
return used, nil
|
||||
}
|
||||
|
||||
func (k *KubernetesRunnerManager) loadSingleJob(deployment *appv1.Deployment, environment ExecutionEnvironment) (r Runner, isUsed bool, err error) {
|
||||
configTaskGroup := deployment.Spec.Template
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("%w, %s", nomad.ErrorMissingTaskGroup, deployment.Name)
|
||||
}
|
||||
|
||||
isUsed = configTaskGroup.Annotations[nomad.ConfigMetaUsedKey] == nomad.ConfigMetaUsedValue
|
||||
portMappings, err := k.apiClient.LoadRunnerPortMappings(deployment.Name)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("error loading runner portMappings: %w", err)
|
||||
}
|
||||
|
||||
newJob := NewKubernetesDeployment(deployment.Name, portMappings, k.apiClient, k.onRunnerDestroyed)
|
||||
log.WithField("isUsed", isUsed).WithField(dto.KeyRunnerID, newJob.ID()).Debug("Recovered Runner")
|
||||
if isUsed {
|
||||
timeout, err := strconv.Atoi(configTaskGroup.ObjectMeta.Annotations[nomad.ConfigMetaTimeoutKey])
|
||||
if err != nil {
|
||||
log.WithField(dto.KeyRunnerID, newJob.ID()).WithError(err).Warn("failed loading timeout from meta values")
|
||||
timeout = int(nomad.RunnerTimeoutFallback.Seconds())
|
||||
go k.markRunnerAsUsed(newJob, timeout)
|
||||
}
|
||||
newJob.SetupTimeout(time.Duration(timeout) * time.Second)
|
||||
} else {
|
||||
environment.AddRunner(newJob)
|
||||
}
|
||||
return newJob, isUsed, nil
|
||||
}
|
||||
|
||||
func (k *KubernetesRunnerManager) markRunnerAsUsed(runner Runner, timeoutDuration int) {
|
||||
err := util.RetryExponential(func() (err error) {
|
||||
if err = k.apiClient.MarkRunnerAsUsed(runner.ID(), timeoutDuration); err != nil {
|
||||
err = fmt.Errorf("cannot mark runner as used: %w", err)
|
||||
}
|
||||
return
|
||||
})
|
||||
if err != nil {
|
||||
log.WithError(err).WithField(dto.KeyRunnerID, runner.ID()).Error("cannot mark runner as used")
|
||||
err := k.Return(runner)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField(dto.KeyRunnerID, runner.ID()).Error("can't mark runner as used and can't return runner")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (k *KubernetesRunnerManager) onRunnerDestroyed(r Runner) error {
|
||||
k.usedRunners.Delete(r.ID())
|
||||
|
||||
environment, ok := k.GetEnvironment(r.Environment())
|
||||
if ok {
|
||||
environment.DeleteRunner(r.ID())
|
||||
}
|
||||
return nil
|
||||
}
|
Reference in New Issue
Block a user