added poseidon with aws to k8s changes
This commit is contained in:
251
internal/runner/k8s_runner.go
Normal file
251
internal/runner/k8s_runner.go
Normal file
@ -0,0 +1,251 @@
|
||||
package runner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/google/uuid"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/pkg/monitoring"
|
||||
"github.com/openHPI/poseidon/pkg/storage"
|
||||
"io"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/client-go/kubernetes"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
var ErrPodCreationFailed = errors.New("failed to create pod")
|
||||
|
||||
var (
|
||||
ErrorUnknownExecution = errors.New("unknown execution")
|
||||
ErrFileNotFound = errors.New("file not found or insufficient permissions")
|
||||
ErrOOMKilled DestroyReason = errors.New("the runner was killed due to out of memory")
|
||||
ErrDestroyedByAPIRequest DestroyReason = errors.New("the client wants to stop the runner")
|
||||
)
|
||||
|
||||
// KubernetesPodWorkload is an abstraction to manage a Kubernetes pod.
|
||||
// It is not persisted on a Poseidon restart.
|
||||
// The InactivityTimer is used actively. It stops and deletes the pod.
|
||||
type KubernetesPodWorkload struct {
|
||||
InactivityTimer
|
||||
id string
|
||||
fs map[dto.FilePath][]byte
|
||||
executions storage.Storage[*dto.ExecutionRequest]
|
||||
runningExecutions map[string]context.CancelFunc
|
||||
onDestroy DestroyRunnerHandler
|
||||
environment ExecutionEnvironment
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
clientset *kubernetes.Clientset
|
||||
podName string
|
||||
namespace string
|
||||
}
|
||||
|
||||
// NewKubernetesPodWorkload creates a new KubernetesPodWorkload with the provided id.
|
||||
func NewKubernetesPodWorkload(
|
||||
environment ExecutionEnvironment, onDestroy DestroyRunnerHandler, clientset *kubernetes.Clientset) (*KubernetesPodWorkload, error) {
|
||||
newUUID, err := uuid.NewUUID()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed generating runner id: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
workload := &KubernetesPodWorkload{
|
||||
id: newUUID.String(),
|
||||
fs: make(map[dto.FilePath][]byte),
|
||||
runningExecutions: make(map[string]context.CancelFunc),
|
||||
onDestroy: onDestroy,
|
||||
environment: environment,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
clientset: clientset,
|
||||
namespace: "default", // You might want to make this configurable
|
||||
podName: fmt.Sprintf("workload-%s", newUUID.String()),
|
||||
}
|
||||
workload.executions = storage.NewMonitoredLocalStorage[*dto.ExecutionRequest](
|
||||
monitoring.MeasurementExecutionsK8s, monitorExecutionsRunnerID(environment.ID(), workload.id), time.Minute, ctx)
|
||||
workload.InactivityTimer = NewInactivityTimer(workload, func(_ Runner) error {
|
||||
return workload.Destroy(nil)
|
||||
})
|
||||
return workload, nil
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) ID() string {
|
||||
return w.id
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) Environment() dto.EnvironmentID {
|
||||
return w.environment.ID()
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) MappedPorts() []*dto.MappedPort {
|
||||
// Implement port mapping logic for Kubernetes
|
||||
return []*dto.MappedPort{}
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) StoreExecution(id string, request *dto.ExecutionRequest) {
|
||||
w.executions.Add(id, request)
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) ExecutionExists(id string) bool {
|
||||
_, ok := w.executions.Get(id)
|
||||
return ok
|
||||
}
|
||||
|
||||
// ExecuteInteractively runs the execution request in a Kubernetes pod.
|
||||
func (w *KubernetesPodWorkload) ExecuteInteractively(
|
||||
id string, _ io.ReadWriter, stdout, stderr io.Writer, ctx context.Context) (
|
||||
<-chan ExitInfo, context.CancelFunc, error) {
|
||||
w.ResetTimeout()
|
||||
request, ok := w.executions.Pop(id)
|
||||
if !ok {
|
||||
return nil, nil, ErrorUnknownExecution
|
||||
}
|
||||
hideEnvironmentVariablesK8s(request, "K8S")
|
||||
command, executionCtx, cancel := prepareExecution(request, w.ctx)
|
||||
exitInternal := make(chan ExitInfo)
|
||||
exit := make(chan ExitInfo, 1)
|
||||
|
||||
go w.executeCommand(executionCtx, command, stdout, stderr, exitInternal)
|
||||
go w.handleRunnerTimeout(executionCtx, exitInternal, exit, id)
|
||||
|
||||
return exit, cancel, nil
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) ListFileSystem(path string, recursive bool, writer io.Writer, humanReadable bool, ctx context.Context) error {
|
||||
// Implement file system listing for Kubernetes pods
|
||||
return dto.ErrNotSupported
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) UpdateFileSystem(request *dto.UpdateFileSystemRequest, ctx context.Context) error {
|
||||
// Implement file system update for Kubernetes pods
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) GetFileContent(path string, writer http.ResponseWriter, humanReadable bool, ctx context.Context) error {
|
||||
// Implement file content retrieval for Kubernetes pods
|
||||
return dto.ErrNotSupported
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) Destroy(_ DestroyReason) error {
|
||||
w.cancel()
|
||||
err := w.clientset.CoreV1().Pods(w.namespace).Delete(context.Background(), w.podName, metav1.DeleteOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("error while destroying kubernetes pod: %w", err)
|
||||
}
|
||||
if err := w.onDestroy(w); err != nil {
|
||||
return fmt.Errorf("error while destroying kubernetes runner: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) executeCommand(ctx context.Context, command string,
|
||||
stdout, stderr io.Writer, exit chan<- ExitInfo,
|
||||
) {
|
||||
defer close(exit)
|
||||
|
||||
pod := &corev1.Pod{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: w.podName,
|
||||
},
|
||||
Spec: corev1.PodSpec{
|
||||
RestartPolicy: corev1.RestartPolicyNever,
|
||||
Containers: []corev1.Container{
|
||||
{
|
||||
Name: "workload",
|
||||
Image: w.environment.Image(),
|
||||
Command: []string{"/bin/sh", "-c", command},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_, err := w.clientset.CoreV1().Pods(w.namespace).Create(ctx, pod, metav1.CreateOptions{})
|
||||
if err != nil {
|
||||
exit <- ExitInfo{1, fmt.Errorf("%w: %v", ErrPodCreationFailed, err)}
|
||||
return
|
||||
}
|
||||
|
||||
req := w.clientset.CoreV1().Pods(w.namespace).GetLogs(w.podName, &corev1.PodLogOptions{
|
||||
Follow: true,
|
||||
})
|
||||
podLogs, err := req.Stream(ctx)
|
||||
if err != nil {
|
||||
exit <- ExitInfo{1, fmt.Errorf("error in opening stream: %v", err)}
|
||||
return
|
||||
}
|
||||
defer func(podLogs io.ReadCloser) {
|
||||
err := podLogs.Close()
|
||||
if err != nil {
|
||||
exit <- ExitInfo{1, fmt.Errorf("error in closing stream: %v", err)}
|
||||
}
|
||||
}(podLogs)
|
||||
|
||||
_, err = io.Copy(stdout, podLogs)
|
||||
if err != nil {
|
||||
exit <- ExitInfo{1, fmt.Errorf("error in copying logs: %v", err)}
|
||||
return
|
||||
}
|
||||
|
||||
// Wait for the pod to complete
|
||||
watch, err := w.clientset.CoreV1().Pods(w.namespace).Watch(ctx, metav1.ListOptions{
|
||||
FieldSelector: fmt.Sprintf("metadata.name=%s", w.podName),
|
||||
})
|
||||
if err != nil {
|
||||
exit <- ExitInfo{1, fmt.Errorf("error watching pod: %v", err)}
|
||||
return
|
||||
}
|
||||
defer watch.Stop()
|
||||
|
||||
for event := range watch.ResultChan() {
|
||||
pod, ok := event.Object.(*corev1.Pod)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
|
||||
exitCode := uint8(0)
|
||||
if pod.Status.Phase == corev1.PodFailed {
|
||||
exitCode = 1
|
||||
}
|
||||
exit <- ExitInfo{exitCode, nil}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *KubernetesPodWorkload) handleRunnerTimeout(ctx context.Context,
|
||||
exitInternal <-chan ExitInfo, exit chan<- ExitInfo, executionID string) {
|
||||
executionCtx, cancelExecution := context.WithCancel(ctx)
|
||||
w.runningExecutions[executionID] = cancelExecution
|
||||
defer delete(w.runningExecutions, executionID)
|
||||
defer close(exit)
|
||||
|
||||
select {
|
||||
case exitInfo := <-exitInternal:
|
||||
exit <- exitInfo
|
||||
case <-executionCtx.Done():
|
||||
exit <- ExitInfo{255, ErrorRunnerInactivityTimeout}
|
||||
}
|
||||
}
|
||||
|
||||
// hideEnvironmentVariables sets the CODEOCEAN variable and unsets all variables starting with the passed prefix.
|
||||
func hideEnvironmentVariablesK8s(request *dto.ExecutionRequest, unsetPrefix string) {
|
||||
if request.Environment == nil {
|
||||
request.Environment = make(map[string]string)
|
||||
}
|
||||
request.Command = "unset \"${!" + unsetPrefix + "@}\" && " + request.Command
|
||||
}
|
||||
|
||||
func prepareExecution(request *dto.ExecutionRequest, environmentCtx context.Context) (
|
||||
command string, ctx context.Context, cancel context.CancelFunc,
|
||||
) {
|
||||
command = request.FullCommand()
|
||||
if request.TimeLimit == 0 {
|
||||
ctx, cancel = context.WithCancel(environmentCtx)
|
||||
} else {
|
||||
ctx, cancel = context.WithTimeout(environmentCtx, time.Duration(request.TimeLimit)*time.Second)
|
||||
}
|
||||
return command, ctx, cancel
|
||||
}
|
Reference in New Issue
Block a user