added k8s stub adapter for execution environment

2024-09-18 10:43:38 +02:00
parent f9a6ba8f1c
commit 12ff205bd2
119 changed files with 1374 additions and 12549 deletions
--- a/internal/kubernetes/api_querier.go
+++ b/internal/kubernetes/api_querier.go
@ -0,0 +1,122 @@
+package kubernetes
+
+import (
+	"context"
+	"errors"
+	nomadApi "github.com/hashicorp/nomad/api"
+	"io"
+	appv1 "k8s.io/api/apps/v1"
+	"k8s.io/client-go/kubernetes"
+	"k8s.io/client-go/rest"
+)
+
+var (
+	ErrorNoAllocationFound = errors.New("no allocation found")
+)
+
+// apiQuerier provides access to the Nomad functionality.
+type apiQuerier interface {
+	// init prepares an apiClient to be able to communicate to a provided Nomad API.
+	init(nomadConfig *rest.Config) (err error)
+
+	// LoadJobList loads the list of jobs from the Nomad API.
+	LoadJobList() (list []*appv1.DeploymentList, err error)
+
+	// JobScale returns the scale of the passed job.
+	JobScale(jobID string) (jobScale uint, err error)
+
+	// SetJobScale sets the scaling count of the passed job to Nomad.
+	SetJobScale(jobID string, count uint, reason string) (err error)
+
+	// DeleteJob deletes the Job with the given ID.
+	DeleteDeployment(name string) (err error)
+
+	// Execute runs a command in the passed job.
+	Execute(jobID string, ctx context.Context, command string, tty bool,
+		stdin io.Reader, stdout, stderr io.Writer) (int, error)
+
+	// listJobs loads all jobs with the specified prefix.
+	listDeployments(namespace string) (jobListStub []*appv1.DeploymentList, err error)
+
+	// job returns the job of the given jobID.
+	deployment(name string) (deployment appv1.Deployment, err error)
+
+	// listAllocations loads all allocations.
+	listAllocations() (allocationListStub []*nomadApi.AllocationListStub, err error)
+
+	// allocation returns the first allocation of the given job.
+	allocation(jobID string) (*nomadApi.Allocation, error)
+
+	// RegisterKubernetesDeployment registers a deployment with Kubernetes.
+	// It returns the deployment ID that can be used when listening to the Kubernetes event stream.
+	RegisterKubernetesDeployment(deployment appv1.Deployment) (string, error)
+
+	// EventStream returns a Nomad event stream filtered to return only allocation and evaluation events.
+	EventStream(ctx context.Context) (<-chan *nomadApi.Events, error)
+}
+
+// nomadAPIClient implements the nomadApiQuerier interface and provides access to a real Nomad API.
+type kubernetesAPIClient struct {
+	client    *kubernetes.Clientset
+	namespace string
+}
+
+func (k kubernetesAPIClient) init(nomadConfig *rest.Config) (err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) LoadJobList() (list []*appv1.DeploymentList, err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) JobScale(jobID string) (jobScale uint, err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) SetJobScale(jobID string, count uint, reason string) (err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) DeleteDeployment(name string) (err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) Execute(jobID string, ctx context.Context, command string, tty bool, stdin io.Reader, stdout, stderr io.Writer) (int, error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) listDeployments(namespace string) (jobListStub []*appv1.DeploymentList, err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) deployment(name string) (deployment appv1.Deployment, err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) listAllocations() (allocationListStub []*nomadApi.AllocationListStub, err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) allocation(jobID string) (*nomadApi.Allocation, error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) RegisterKubernetesDeployment(deployment appv1.Deployment) (string, error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (k kubernetesAPIClient) EventStream(ctx context.Context) (<-chan *nomadApi.Events, error) {
+	//TODO implement me
+	panic("implement me")
+}
--- a/internal/kubernetes/deployment.go
+++ b/internal/kubernetes/deployment.go
@ -0,0 +1,125 @@
+package kubernetes
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	nomadApi "github.com/hashicorp/nomad/api"
+	"github.com/openHPI/poseidon/pkg/dto"
+	appv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	"strconv"
+	"strings"
+	"time"
+)
+
+const (
+	TemplateJobPrefix     = "template"
+	TaskGroupName         = "default-group"
+	TaskName              = "default-task"
+	TaskCount             = 1
+	TaskDriver            = "docker"
+	TaskCommand           = "sleep"
+	ConfigTaskGroupName   = "config"
+	ConfigTaskName        = "config"
+	ConfigTaskDriver      = "exec"
+	ConfigTaskCommand     = "true"
+	ConfigMetaUsedKey     = "used"
+	ConfigMetaUsedValue   = "true"
+	ConfigMetaUnusedValue = "false"
+	ConfigMetaTimeoutKey  = "timeout"
+	ConfigMetaPoolSizeKey = "prewarmingPoolSize"
+	TemplateJobNameParts  = 2
+	RegisterTimeout       = 10 * time.Second
+	RunnerTimeoutFallback = 60 * time.Second
+)
+
+var (
+	ErrorInvalidJobID     = errors.New("invalid job id")
+	ErrorMissingTaskGroup = errors.New("couldn't find config task group in job")
+	TaskArgs              = []string{"infinity"}
+)
+
+func (a *APIClient) RegisterRunnerJob(template *appv1.Deployment) error {
+	evalID, err := a.apiQuerier.RegisterKubernetesDeployment(*template)
+	if err != nil {
+		return fmt.Errorf("couldn't register runner job: %w", err)
+	}
+
+	registerTimeout, cancel := context.WithTimeout(context.Background(), RegisterTimeout)
+	defer cancel()
+	return a.MonitorEvaluation(evalID, registerTimeout)
+}
+
+// SetForcePullFlag sets the flag of a job if the image should be pulled again.
+func SetForcePullFlag(deployment *appv1.Deployment, value bool) {
+	for _, container := range deployment.Spec.Template.Spec.Containers {
+		if container.Name == TaskName {
+			if value {
+				container.ImagePullPolicy = v1.PullAlways
+			} else {
+				container.ImagePullPolicy = v1.PullIfNotPresent
+			}
+		}
+	}
+}
+
+// IsEnvironmentTemplateID checks if the passed job id belongs to a template job.
+func IsEnvironmentTemplateID(jobID string) bool {
+	parts := strings.Split(jobID, "-")
+	if len(parts) != TemplateJobNameParts || parts[0] != TemplateJobPrefix {
+		return false
+	}
+
+	_, err := EnvironmentIDFromTemplateJobID(jobID)
+	return err == nil
+}
+
+// RunnerJobID returns the nomad job id of the runner with the given environmentID and id.
+func RunnerJobID(environmentID dto.EnvironmentID, id string) string {
+	return fmt.Sprintf("%d-%s", environmentID, id)
+}
+
+// TemplateJobID returns the id of the nomad job for the environment with the given id.
+func TemplateJobID(id dto.EnvironmentID) string {
+	return fmt.Sprintf("%s-%d", TemplateJobPrefix, id)
+}
+
+// EnvironmentIDFromRunnerID returns the environment id that is part of the passed runner job id.
+func EnvironmentIDFromRunnerID(jobID string) (dto.EnvironmentID, error) {
+	return partOfJobID(jobID, 0)
+}
+
+// EnvironmentIDFromTemplateJobID returns the environment id that is part of the passed environment job id.
+func EnvironmentIDFromTemplateJobID(id string) (dto.EnvironmentID, error) {
+	return partOfJobID(id, 1)
+}
+
+func partOfJobID(id string, part uint) (dto.EnvironmentID, error) {
+	parts := strings.Split(id, "-")
+	if len(parts) == 0 {
+		return 0, fmt.Errorf("empty job id: %w", ErrorInvalidJobID)
+	}
+	environmentID, err := strconv.Atoi(parts[part])
+	if err != nil {
+		return 0, fmt.Errorf("invalid environment id par %v: %w", err, ErrorInvalidJobID)
+	}
+	return dto.EnvironmentID(environmentID), nil
+}
+
+func isOOMKilled(alloc *nomadApi.Allocation) bool {
+	state, ok := alloc.TaskStates[TaskName]
+	if !ok {
+		return false
+	}
+
+	var oomKilledCount uint64
+	for _, event := range state.Events {
+		if oomString, ok := event.Details["oom_killed"]; ok {
+			if oom, err := strconv.ParseBool(oomString); err == nil && oom {
+				oomKilledCount++
+			}
+		}
+	}
+	return oomKilledCount >= state.Restarts
+}
--- a/internal/kubernetes/kubernetes.go
+++ b/internal/kubernetes/kubernetes.go
@ -0,0 +1,221 @@
+package kubernetes
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	nomadApi "github.com/hashicorp/nomad/api"
+	"github.com/hashicorp/nomad/nomad/structs"
+	"github.com/influxdata/influxdb-client-go/v2/api/write"
+	"github.com/openHPI/poseidon/pkg/dto"
+	"github.com/openHPI/poseidon/pkg/logging"
+	"github.com/openHPI/poseidon/pkg/monitoring"
+	"github.com/openHPI/poseidon/pkg/storage"
+	"io"
+	appsv1 "k8s.io/api/apps/v1"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/client-go/rest"
+	"strings"
+	"time"
+)
+
+var (
+	log                                                        = logging.GetLogger("kubernetes")
+	ErrorExecutorCommunicationFailed                           = errors.New("communication with executor failed")
+	ErrorEvaluation                                            = errors.New("evaluation could not complete")
+	ErrorPlacingAllocations                                    = errors.New("failed to place all allocations")
+	ErrorLoadingJob                                            = errors.New("failed to load job")
+	ErrorNoAllocatedResourcesFound                             = errors.New("no allocated resources found")
+	ErrorLocalDestruction                  RunnerDeletedReason = errors.New("the destruction should not cause external changes")
+	ErrorOOMKilled                         RunnerDeletedReason = fmt.Errorf("%s: %w", dto.ErrOOMKilled.Error(), ErrorLocalDestruction)
+	ErrorAllocationRescheduled             RunnerDeletedReason = fmt.Errorf("the allocation was rescheduled: %w", ErrorLocalDestruction)
+	ErrorAllocationStopped                 RunnerDeletedReason = errors.New("the allocation was stopped")
+	ErrorAllocationStoppedUnexpectedly     RunnerDeletedReason = fmt.Errorf("%w unexpectedly", ErrorAllocationStopped)
+	ErrorAllocationRescheduledUnexpectedly RunnerDeletedReason = fmt.Errorf(
+		"%w correctly but rescheduled", ErrorAllocationStopped)
+	// ErrorAllocationCompleted is for reporting the reason for the stopped allocation.
+	// We do not consider it as an error but add it anyway for a complete reporting.
+	ErrorAllocationCompleted RunnerDeletedReason = errors.New("the allocation completed")
+)
+
+type allocationData struct {
+	// allocClientStatus defines the state defined by Nomad.
+	allocClientStatus string
+	// allocDesiredStatus defines if the allocation wants to be running or being stopped.
+	allocDesiredStatus string
+	jobID              string
+	start              time.Time
+	// stopExpected is used to suppress warnings that could be triggered by a race condition
+	// between the Inactivity timer and an external event leadng to allocation rescheduling.
+	stopExpected bool
+	// Just debugging information
+	allocNomadNode string
+}
+
+// resultChannelWriteTimeout is to detect the error when more element are written into a channel than expected.
+const resultChannelWriteTimeout = 10 * time.Millisecond
+
+type DeletedAllocationProcessor func(jobID string, RunnerDeletedReason error) (removedByPoseidon bool)
+type NewAllocationProcessor func(*nomadApi.Allocation, time.Duration)
+
+// AllocationProcessing includes the callbacks to interact with allocation events.
+type AllocationProcessing struct {
+	OnNew     NewAllocationProcessor
+	OnDeleted DeletedAllocationProcessor
+}
+type RunnerDeletedReason error
+
+// ExecutorAPI provides access to a container orchestration solution.
+type ExecutorAPI interface {
+	apiQuerier
+
+	// LoadEnvironmentJobs loads all environment jobs.
+	LoadEnvironmentJobs() ([]*appsv1.Deployment, error)
+
+	// LoadRunnerJobs loads all runner jobs specific for the environment.
+	LoadRunnerJobs(environmentID dto.EnvironmentID) ([]*appsv1.Deployment, error)
+
+	// LoadRunnerIDs returns the IDs of all runners with the specified id prefix which are not about to
+	// get stopped.
+	LoadRunnerIDs(prefix string) (runnerIds []string, err error)
+
+	// LoadRunnerPortMappings returns the mapped ports of the runner.
+	LoadRunnerPortMappings(runnerID string) ([]v1.ContainerPort, error)
+
+	// RegisterRunnerJob creates a runner job based on the template job.
+	// It registers the job and waits until the registration completes.
+	RegisterRunnerJob(template *appsv1.Deployment) error
+
+	// MonitorEvaluation monitors the given evaluation ID.
+	// It waits until the evaluation reaches one of the states complete, canceled or failed.
+	// If the evaluation was not successful, an error containing the failures is returned.
+	// See also https://github.com/hashicorp/nomad/blob/7d5a9ecde95c18da94c9b6ace2565afbfdd6a40d/command/monitor.go#L175
+	MonitorEvaluation(evaluationID string, ctx context.Context) error
+
+	// WatchEventStream listens on the Nomad event stream for allocation and evaluation events.
+	// Depending on the incoming event, any of the given function is executed.
+	// Do not run multiple times simultaneously.
+	WatchEventStream(ctx context.Context, callbacks *AllocationProcessing) error
+
+	// ExecuteCommand executes the given command in the job/runner with the given id.
+	// It writes the output of the command to stdout/stderr and reads input from stdin.
+	// If tty is true, the command will run with a tty.
+	// Iff privilegedExecution is true, the command will be executed privileged.
+	// The command is passed in the shell form (not the exec array form) and will be executed in a shell.
+	ExecuteCommand(jobID string, ctx context.Context, command string, tty bool, privilegedExecution bool,
+		stdin io.Reader, stdout, stderr io.Writer) (int, error)
+
+	// MarkRunnerAsUsed marks the runner with the given ID as used. It also stores the timeout duration in the metadata.
+	MarkRunnerAsUsed(runnerID string, duration int) error
+}
+
+// APIClient implements the ExecutorAPI interface and can be used to perform different operations on the real
+// Executor API and its return values.
+type APIClient struct {
+	apiQuerier
+	evaluations storage.Storage[chan error]
+	// allocations contain management data for all pending and running allocations.
+	allocations storage.Storage[*allocationData]
+	isListening bool
+}
+
+func (A APIClient) LoadEnvironmentJobs() ([]*appsv1.Deployment, error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (a *APIClient) LoadRunnerJobs(environmentID dto.EnvironmentID) ([]*appsv1.Deployment, error) {
+	go a.initializeAllocations(environmentID)
+
+	runnerIDs, err := a.LoadRunnerIDs(RunnerJobID(environmentID, ""))
+	if err != nil {
+		return []*appsv1.Deployment{}, fmt.Errorf("couldn't load jobs: %w", err)
+	}
+
+	var occurredError error
+	jobs := make([]*appsv1.Deployment, 0, len(runnerIDs))
+	for _, id := range runnerIDs {
+		job, err := a.apiQuerier.deployment(id)
+		if err != nil {
+			if occurredError == nil {
+				occurredError = ErrorLoadingJob
+			}
+			occurredError = fmt.Errorf("%w: couldn't load job info for runner %s - %v", occurredError, id, err)
+			continue
+		}
+		jobs = append(jobs, &job)
+	}
+	return jobs, occurredError
+}
+
+func (A APIClient) LoadRunnerIDs(prefix string) (runnerIds []string, err error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (A APIClient) LoadRunnerPortMappings(runnerID string) ([]v1.ContainerPort, error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (A APIClient) MonitorEvaluation(evaluationID string, ctx context.Context) error {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (A APIClient) WatchEventStream(ctx context.Context, callbacks *AllocationProcessing) error {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (A APIClient) ExecuteCommand(jobID string, ctx context.Context, command string, tty bool, privilegedExecution bool, stdin io.Reader, stdout, stderr io.Writer) (int, error) {
+	//TODO implement me
+	panic("implement me")
+}
+
+func (A APIClient) MarkRunnerAsUsed(runnerID string, duration int) error {
+	//TODO implement me
+	panic("implement me")
+}
+
+// NewExecutorAPI creates a new api client.
+// One client is usually sufficient for the complete runtime of the API.
+func NewExecutorAPI(kubernetesConfig *rest.Config) (ExecutorAPI, error) {
+	client := &APIClient{
+		apiQuerier:  &kubernetesAPIClient{},
+		evaluations: storage.NewLocalStorage[chan error](),
+		allocations: storage.NewMonitoredLocalStorage[*allocationData](monitoring.MeasurementNomadAllocations,
+			func(p *write.Point, object *allocationData, _ storage.EventType) {
+				p.AddTag(monitoring.InfluxKeyJobID, object.jobID)
+				p.AddTag(monitoring.InfluxKeyClientStatus, object.allocClientStatus)
+				p.AddTag(monitoring.InfluxKeyNomadNode, object.allocNomadNode)
+			}, 0, nil),
+	}
+	err := client.init(kubernetesConfig)
+	return client, err
+}
+
+func (a *APIClient) initializeAllocations(environmentID dto.EnvironmentID) {
+	allocationStubs, err := a.listAllocations()
+	if err != nil {
+		log.WithError(err).Warn("Could not initialize allocations")
+	} else {
+		for _, stub := range allocationStubs {
+			switch {
+			case IsEnvironmentTemplateID(stub.JobID):
+				continue
+			case !strings.HasPrefix(stub.JobID, RunnerJobID(environmentID, "")):
+				continue
+			case stub.ClientStatus == structs.AllocClientStatusPending || stub.ClientStatus == structs.AllocClientStatusRunning:
+				log.WithField("jobID", stub.JobID).WithField("status", stub.ClientStatus).Debug("Recovered Allocation")
+				a.allocations.Add(stub.ID, &allocationData{
+					allocClientStatus:  stub.ClientStatus,
+					allocDesiredStatus: stub.DesiredStatus,
+					jobID:              stub.JobID,
+					start:              time.Unix(0, stub.CreateTime),
+					allocNomadNode:     stub.NodeName,
+				})
+			}
+		}
+	}
+}