Files
poseidon/internal/runner/runner.go
sirkrypt0 36dc99f019 Don't embed the execution.Storer interface into a runner
Previously, the execution.Storer interface was embedded in the Runner interface.
However, this resulted in calls like runner.Add(...) to add an execution to the
store which is kind of ugly. Thus, we decided to add only the required functions to
the runner interface and make the execution.Storer a field of the implementation.
2021-07-30 14:28:06 +02:00

327 lines
9.8 KiB
Go

package runner
import (
"archive/tar"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
nomadApi "github.com/hashicorp/nomad/api"
"gitlab.hpi.de/codeocean/codemoon/poseidon/internal/nomad"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/dto"
"gitlab.hpi.de/codeocean/codemoon/poseidon/pkg/execution"
"io"
"strings"
"time"
)
// ContextKey is the type for keys in a request context.
type ContextKey string
const (
// runnerContextKey is the key used to store runners in context.Context.
runnerContextKey ContextKey = "runner"
// SIGQUIT is the character that causes a tty to send the SIGQUIT signal to the controlled process.
SIGQUIT = 0x1c
// executionTimeoutGracePeriod is the time to wait after sending a SIGQUIT signal to a timed out execution.
// If the execution does not return after this grace period, the runner is destroyed.
executionTimeoutGracePeriod = 3 * time.Second
)
var (
ErrorUnknownExecution = errors.New("unknown execution")
ErrorFileCopyFailed = errors.New("file copy failed")
)
type ExitInfo struct {
Code uint8
Err error
}
type Runner interface {
InactivityTimer
// ID returns the id of the runner.
ID() string
// MappedPorts returns the mapped ports of the runner.
MappedPorts() []*dto.MappedPort
// StoreExecution adds a new execution to the runner that can then be executed using ExecuteInteractively.
StoreExecution(id string, executionRequest *dto.ExecutionRequest)
// ExecutionExists returns whether the execution with the given id is already stored.
ExecutionExists(id string) bool
// ExecuteInteractively runs the given execution request and forwards from and to the given reader and writers.
// An ExitInfo is sent to the exit channel on command completion.
// Output from the runner is forwarded immediately.
ExecuteInteractively(
id string,
stdin io.ReadWriter,
stdout,
stderr io.Writer,
) (exit <-chan ExitInfo, cancel context.CancelFunc, err error)
// UpdateFileSystem processes a dto.UpdateFileSystemRequest by first deleting each given dto.FilePath recursively
// and then copying each given dto.File to the runner.
UpdateFileSystem(request *dto.UpdateFileSystemRequest) error
// Destroy destroys the Runner in Nomad.
Destroy() error
}
// NomadJob is an abstraction to communicate with Nomad environments.
type NomadJob struct {
InactivityTimer
executions execution.Storer
id string
portMappings []nomadApi.PortMapping
api nomad.ExecutorAPI
manager Manager
}
// NewNomadJob creates a new NomadJob with the provided id.
func NewNomadJob(id string, portMappings []nomadApi.PortMapping,
apiClient nomad.ExecutorAPI, manager Manager,
) *NomadJob {
job := &NomadJob{
id: id,
portMappings: portMappings,
api: apiClient,
executions: execution.NewLocalStorage(),
manager: manager,
}
job.InactivityTimer = NewInactivityTimer(job, manager)
return job
}
func (r *NomadJob) ID() string {
return r.id
}
func (r *NomadJob) MappedPorts() []*dto.MappedPort {
ports := make([]*dto.MappedPort, 0, len(r.portMappings))
for _, portMapping := range r.portMappings {
ports = append(ports, &dto.MappedPort{
ExposedPort: uint(portMapping.To),
HostAddress: fmt.Sprintf("%s:%d", portMapping.HostIP, portMapping.Value),
})
}
return ports
}
func (r *NomadJob) StoreExecution(id string, request *dto.ExecutionRequest) {
r.executions.Add(execution.ID(id), request)
}
func (r *NomadJob) ExecutionExists(id string) bool {
return r.executions.Exists(execution.ID(id))
}
func (r *NomadJob) ExecuteInteractively(
id string,
stdin io.ReadWriter,
stdout, stderr io.Writer,
) (<-chan ExitInfo, context.CancelFunc, error) {
request, ok := r.executions.Pop(execution.ID(id))
if !ok {
return nil, nil, ErrorUnknownExecution
}
r.ResetTimeout()
command, ctx, cancel := prepareExecution(request)
exitInternal := make(chan ExitInfo)
exit := make(chan ExitInfo, 1)
ctxExecute, cancelExecute := context.WithCancel(context.Background())
go r.executeCommand(ctxExecute, command, stdin, stdout, stderr, exitInternal)
go r.handleExitOrContextDone(ctx, cancelExecute, exitInternal, exit, stdin)
return exit, cancel, nil
}
func (r *NomadJob) UpdateFileSystem(copyRequest *dto.UpdateFileSystemRequest) error {
r.ResetTimeout()
var tarBuffer bytes.Buffer
if err := createTarArchiveForFiles(copyRequest.Copy, &tarBuffer); err != nil {
return err
}
fileDeletionCommand := fileDeletionCommand(copyRequest.Delete)
copyCommand := "tar --extract --absolute-names --verbose --file=/dev/stdin;"
updateFileCommand := (&dto.ExecutionRequest{Command: fileDeletionCommand + copyCommand}).FullCommand()
stdOut := bytes.Buffer{}
stdErr := bytes.Buffer{}
exitCode, err := r.api.ExecuteCommand(r.id, context.Background(), updateFileCommand, false,
&tarBuffer, &stdOut, &stdErr)
if err != nil {
return fmt.Errorf(
"%w: nomad error during file copy: %v",
nomad.ErrorExecutorCommunicationFailed,
err)
}
if exitCode != 0 {
return fmt.Errorf(
"%w: stderr output '%s' and stdout output '%s'",
ErrorFileCopyFailed,
stdErr.String(),
stdOut.String())
}
return nil
}
func (r *NomadJob) Destroy() error {
if err := r.manager.Return(r); err != nil {
return fmt.Errorf("error while destroying runner: %w", err)
}
return nil
}
func prepareExecution(request *dto.ExecutionRequest) (
command []string, ctx context.Context, cancel context.CancelFunc,
) {
command = request.FullCommand()
if request.TimeLimit == 0 {
ctx, cancel = context.WithCancel(context.Background())
} else {
ctx, cancel = context.WithTimeout(context.Background(), time.Duration(request.TimeLimit)*time.Second)
}
return command, ctx, cancel
}
func (r *NomadJob) executeCommand(ctx context.Context, command []string,
stdin io.ReadWriter, stdout, stderr io.Writer, exit chan<- ExitInfo,
) {
exitCode, err := r.api.ExecuteCommand(r.id, ctx, command, true, stdin, stdout, stderr)
if err == nil && r.TimeoutPassed() {
err = ErrorRunnerInactivityTimeout
}
exit <- ExitInfo{uint8(exitCode), err}
}
func (r *NomadJob) handleExitOrContextDone(ctx context.Context, cancelExecute context.CancelFunc,
exitInternal <-chan ExitInfo, exit chan<- ExitInfo, stdin io.ReadWriter,
) {
defer cancelExecute()
select {
case exitInfo := <-exitInternal:
exit <- exitInfo
close(exit)
return
case <-ctx.Done():
// From this time on until the WebSocket connection to the client is closed in /internal/api/websocket.go
// waitForExit, output can still be forwarded to the client. We accept this race condition because adding
// a locking mechanism would complicate the interfaces used (currently io.Writer).
exit <- ExitInfo{255, ctx.Err()}
close(exit)
}
// This injects the SIGQUIT character into the stdin. This character is parsed by the tty line discipline
// (tty has to be true) and converted to a SIGQUIT signal sent to the foreground process attached to the tty.
// By default, SIGQUIT causes the process to terminate and produces a core dump. Processes can catch this signal
// and ignore it, which is why we destroy the runner if the process does not terminate after a grace period.
n, err := stdin.Write([]byte{SIGQUIT})
if n != 1 {
log.WithField("runner", r.id).Warn("Could not send SIGQUIT because nothing was written")
}
if err != nil {
log.WithField("runner", r.id).WithError(err).Warn("Could not send SIGQUIT due to error")
}
select {
case <-exitInternal:
log.WithField("runner", r.id).Debug("Execution terminated after SIGQUIT")
case <-time.After(executionTimeoutGracePeriod):
log.WithField("runner", r.id).Info("Execution did not quit after SIGQUIT")
if err := r.Destroy(); err != nil {
log.WithField("runner", r.id).Error("Error when destroying runner")
}
}
}
func createTarArchiveForFiles(filesToCopy []dto.File, w io.Writer) error {
tarWriter := tar.NewWriter(w)
for _, file := range filesToCopy {
if err := tarWriter.WriteHeader(tarHeader(file)); err != nil {
err := fmt.Errorf("error writing tar file header: %w", err)
log.
WithField("file", file).
Error(err)
return err
}
if _, err := tarWriter.Write(file.ByteContent()); err != nil {
err := fmt.Errorf("error writing tar file content: %w", err)
log.
WithField("file", file).
Error(err)
return err
}
}
if err := tarWriter.Close(); err != nil {
return fmt.Errorf("error closing tar writer: %w", err)
}
return nil
}
func fileDeletionCommand(pathsToDelete []dto.FilePath) string {
if len(pathsToDelete) == 0 {
return ""
}
command := "rm --recursive --force "
for _, filePath := range pathsToDelete {
// To avoid command injection, filenames need to be quoted.
// See https://unix.stackexchange.com/questions/347332/what-characters-need-to-be-escaped-in-files-without-quotes
// for details.
singleQuoteEscapedFileName := strings.ReplaceAll(filePath.Cleaned(), "'", "'\\''")
command += fmt.Sprintf("'%s' ", singleQuoteEscapedFileName)
}
command += ";"
return command
}
func tarHeader(file dto.File) *tar.Header {
if file.IsDirectory() {
return &tar.Header{
Typeflag: tar.TypeDir,
Name: file.CleanedPath(),
Mode: 0755,
}
} else {
return &tar.Header{
Typeflag: tar.TypeReg,
Name: file.CleanedPath(),
Mode: 0744,
Size: int64(len(file.Content)),
}
}
}
// MarshalJSON implements json.Marshaler interface.
// This exports private attributes like the id too.
func (r *NomadJob) MarshalJSON() ([]byte, error) {
res, err := json.Marshal(struct {
ID string `json:"runnerId"`
}{
ID: r.ID(),
})
if err != nil {
return nil, fmt.Errorf("error marshaling Nomad job: %w", err)
}
return res, nil
}
// NewContext creates a context containing a runner.
func NewContext(ctx context.Context, runner Runner) context.Context {
return context.WithValue(ctx, runnerContextKey, runner)
}
// FromContext returns a runner from a context.
func FromContext(ctx context.Context) (Runner, bool) {
runner, ok := ctx.Value(runnerContextKey).(Runner)
return runner, ok
}