package kubernetes import ( "context" "errors" "fmt" nomadApi "github.com/hashicorp/nomad/api" "github.com/openHPI/poseidon/pkg/dto" appv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "strconv" "strings" "time" ) const ( TemplateJobPrefix = "template" TaskGroupName = "default-group" TaskName = "default-task" TaskCount = 1 TaskDriver = "docker" TaskCommand = "sleep" ConfigTaskGroupName = "config" ConfigTaskName = "config" ConfigTaskDriver = "exec" ConfigTaskCommand = "true" ConfigMetaUsedKey = "used" ConfigMetaUsedValue = "true" ConfigMetaUnusedValue = "false" ConfigMetaTimeoutKey = "timeout" ConfigMetaPoolSizeKey = "prewarmingPoolSize" TemplateJobNameParts = 2 RegisterTimeout = 10 * time.Second RunnerTimeoutFallback = 60 * time.Second ) var ( ErrorInvalidJobID = errors.New("invalid job id") ErrorMissingTaskGroup = errors.New("couldn't find config task group in job") TaskArgs = []string{"infinity"} ) func (a *APIClient) RegisterRunnerJob(template *appv1.Deployment) error { evalID, err := a.apiQuerier.RegisterKubernetesDeployment(*template) if err != nil { return fmt.Errorf("couldn't register runner job: %w", err) } registerTimeout, cancel := context.WithTimeout(context.Background(), RegisterTimeout) defer cancel() return a.MonitorEvaluation(evalID, registerTimeout) } // SetForcePullFlag sets the flag of a job if the image should be pulled again. func SetForcePullFlag(deployment *appv1.Deployment, value bool) { for _, container := range deployment.Spec.Template.Spec.Containers { if container.Name == TaskName { if value { container.ImagePullPolicy = v1.PullAlways } else { container.ImagePullPolicy = v1.PullIfNotPresent } } } } // IsEnvironmentTemplateID checks if the passed job id belongs to a template job. func IsEnvironmentTemplateID(jobID string) bool { parts := strings.Split(jobID, "-") if len(parts) != TemplateJobNameParts || parts[0] != TemplateJobPrefix { return false } _, err := EnvironmentIDFromTemplateJobID(jobID) return err == nil } // RunnerJobID returns the nomad job id of the runner with the given environmentID and id. func RunnerJobID(environmentID dto.EnvironmentID, id string) string { return fmt.Sprintf("%d-%s", environmentID, id) } // TemplateJobID returns the id of the nomad job for the environment with the given id. func TemplateJobID(id dto.EnvironmentID) string { return fmt.Sprintf("%s-%d", TemplateJobPrefix, id) } // EnvironmentIDFromRunnerID returns the environment id that is part of the passed runner job id. func EnvironmentIDFromRunnerID(jobID string) (dto.EnvironmentID, error) { return partOfJobID(jobID, 0) } // EnvironmentIDFromTemplateJobID returns the environment id that is part of the passed environment job id. func EnvironmentIDFromTemplateJobID(id string) (dto.EnvironmentID, error) { return partOfJobID(id, 1) } func partOfJobID(id string, part uint) (dto.EnvironmentID, error) { parts := strings.Split(id, "-") if len(parts) == 0 { return 0, fmt.Errorf("empty job id: %w", ErrorInvalidJobID) } environmentID, err := strconv.Atoi(parts[part]) if err != nil { return 0, fmt.Errorf("invalid environment id par %v: %w", err, ErrorInvalidJobID) } return dto.EnvironmentID(environmentID), nil } func isOOMKilled(alloc *nomadApi.Allocation) bool { state, ok := alloc.TaskStates[TaskName] if !ok { return false } var oomKilledCount uint64 for _, event := range state.Events { if oomString, ok := event.Details["oom_killed"]; ok { if oom, err := strconv.ParseBool(oomString); err == nil && oom { oomKilledCount++ } } } return oomKilledCount >= state.Restarts }