Configure Systemd Socket Activation

as new way for Poseidon to accept connections. This should reduce our issues caused by deployments.
This commit is contained in:
Maximilian Paß
2023-12-02 16:56:43 +01:00
parent eaa022282c
commit eaddc65989
11 changed files with 128 additions and 58 deletions

View File

@ -208,9 +208,20 @@ jobs:
./poseidon | tee poseidon.log & ./poseidon | tee poseidon.log &
until curl -s --fail http://localhost:7200/api/v1/health ; do sleep 1; done until curl -s --fail http://localhost:7200/api/v1/health ; do sleep 1; done
make e2e-test make e2e-test
- name: Run e2e recovery tests - name: Setup Poseidon Socket
run: | run: |
killall poseidon killall poseidon
mkdir -p ~/.config/systemd/user
cp ./.github/workflows/resources/poseidon-minimal.socket ~/.config/systemd/user/poseidon.socket
cat ./.github/workflows/resources/poseidon-minimal.service | envsubst > ~/.config/systemd/user/poseidon.service
systemctl --user daemon-reload
systemctl --user start poseidon.socket
- name: Print Poseidon Failure logs
if: failure()
run: journalctl -xen --no-pager
- name: Run e2e recovery tests
run: |
tail -f /var/log/syslog &
make e2e-test-recovery make e2e-test-recovery
if: ${{ success() || failure() }} if: ${{ success() || failure() }}
- name: Convert coverage reports - name: Convert coverage reports

View File

@ -0,0 +1,12 @@
# This is a minimal service definition to make use of the systemd socket activation in the e2e tests.
# With Systemd socket activation, systemd sets up a listening socket on behalf of a service.
# This is useful for zero downtime deployments as the systemd sockets hold up the connections while the service is restarting.
[Unit]
Requires=poseidon.socket
[Service]
WorkingDirectory=${GITHUB_WORKSPACE}
ExecStart=${GITHUB_WORKSPACE}/poseidon
Restart=always
Environment="POSEIDON_SERVER_SYSTEMDSOCKETACTIVATION=TRUE"

View File

@ -0,0 +1,4 @@
# This is a minimal socket definition to provide a systemd socket for the Poseidon e2e tests.
[Socket]
ListenStream = 7200

View File

@ -121,6 +121,11 @@ run-with-coverage: build-cover ## Run binary and capture code coverage (during e
@mkdir -p $(GOCOVERDIR) @mkdir -p $(GOCOVERDIR)
@GOCOVERDIR=$(GOCOVERDIR) ./$(PROJECT_NAME) @GOCOVERDIR=$(GOCOVERDIR) ./$(PROJECT_NAME)
## This target uses `systemd-socket-activate` (only Linux) to create a systemd socket and makes it accessible to a new Poseidon execution.
.PHONY: run-with-socket
run-with-socket: build
@systemd-socket-activate -l 7200 ./$(PROJECT_NAME)
.PHONY: convert-run-coverage .PHONY: convert-run-coverage
convert-run-coverage: ## Convert coverage data (created by `run-with-coverage`) to legacy text format convert-run-coverage: ## Convert coverage data (created by `run-with-coverage`) to legacy text format
@go tool covdata textfmt -i $(GOCOVERDIR) -o $(GOCOVERDIR)/coverage_run.cov @go tool covdata textfmt -i $(GOCOVERDIR) -o $(GOCOVERDIR)/coverage_run.cov
@ -138,7 +143,7 @@ e2e-test: deps ## Run e2e tests
.PHONY: e2e-test-recovery .PHONY: e2e-test-recovery
e2e-test-recovery: deps ## Run recovery e2e tests e2e-test-recovery: deps ## Run recovery e2e tests
@go test -count=1 ./tests/recovery -v -args -poseidonPath="../../poseidon" -dockerImage="$(E2E_TEST_DOCKER_IMAGE)" @go test -count=1 ./tests/recovery -v -args -dockerImage="$(E2E_TEST_DOCKER_IMAGE)"
.PHONY: e2e-docker .PHONY: e2e-docker
e2e-docker: docker ## Run e2e tests against the Docker container e2e-docker: docker ## Run e2e tests against the Docker container

View File

@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"github.com/coreos/go-systemd/v22/activation"
"github.com/getsentry/sentry-go" "github.com/getsentry/sentry-go"
sentryhttp "github.com/getsentry/sentry-go/http" sentryhttp "github.com/getsentry/sentry-go/http"
"github.com/openHPI/poseidon/internal/api" "github.com/openHPI/poseidon/internal/api"
@ -14,6 +15,7 @@ import (
"github.com/openHPI/poseidon/pkg/logging" "github.com/openHPI/poseidon/pkg/logging"
"github.com/openHPI/poseidon/pkg/monitoring" "github.com/openHPI/poseidon/pkg/monitoring"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
"net"
"net/http" "net/http"
"os" "os"
"os/signal" "os/signal"
@ -21,6 +23,7 @@ import (
"runtime/debug" "runtime/debug"
"runtime/pprof" "runtime/pprof"
"strconv" "strconv"
"sync"
"time" "time"
) )
@ -156,24 +159,58 @@ func runServer(server *http.Server, cancel context.CancelFunc) {
defer cancel() defer cancel()
defer shutdownSentry() // shutdownSentry must be executed in the main goroutine. defer shutdownSentry() // shutdownSentry must be executed in the main goroutine.
log.WithField("address", server.Addr).Info("Starting server") httpListeners := getHTTPListeners(server)
serveHTTPListeners(server, httpListeners)
}
func getHTTPListeners(server *http.Server) (httpListeners []net.Listener) {
var err error
if config.Config.Server.SystemdSocketActivation {
httpListeners, err = activation.Listeners()
} else {
var httpListener net.Listener
httpListener, err = net.Listen("tcp", server.Addr)
httpListeners = append(httpListeners, httpListener)
}
if err != nil || httpListeners == nil || len(httpListeners) == 0 {
log.WithError(err).
WithField("listeners", httpListeners).
WithField("systemd_socket", config.Config.Server.SystemdSocketActivation).
Fatal("Failed listening to any socket")
return nil
}
return httpListeners
}
func serveHTTPListeners(server *http.Server, httpListeners []net.Listener) {
var wg sync.WaitGroup
wg.Add(len(httpListeners))
for _, l := range httpListeners {
go func(listener net.Listener) {
defer wg.Done()
log.WithField("address", listener.Addr()).Info("Serving Listener")
serveHTTPListener(server, listener)
}(l)
}
wg.Wait()
}
func serveHTTPListener(server *http.Server, l net.Listener) {
var err error var err error
if config.Config.Server.TLS.Active { if config.Config.Server.TLS.Active {
server.TLSConfig = config.TLSConfig server.TLSConfig = config.TLSConfig
log. log.WithField("CertFile", config.Config.Server.TLS.CertFile).
WithField("CertFile", config.Config.Server.TLS.CertFile).
WithField("KeyFile", config.Config.Server.TLS.KeyFile). WithField("KeyFile", config.Config.Server.TLS.KeyFile).
Debug("Using TLS") Debug("Using TLS")
err = server.ListenAndServeTLS(config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile) err = server.ServeTLS(l, config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile)
} else { } else {
err = server.ListenAndServe() err = server.Serve(l)
} }
if err != nil {
if errors.Is(err, http.ErrServerClosed) { if errors.Is(err, http.ErrServerClosed) {
log.WithError(err).Info("Server closed") log.WithError(err).WithField("listener", l).Info("Server closed")
} else { } else {
log.WithError(err).Fatal("Error during listening and serving") log.WithError(err).WithField("listener", l).Error("Error during listening and serving")
}
} }
} }

View File

@ -5,6 +5,10 @@ server:
address: 127.0.0.1 address: 127.0.0.1
# Port on which the webserver listens # Port on which the webserver listens
port: 7200 port: 7200
# When using Systemd socket activation, Poseidon tries to connect to an existing systemd socket instead of creating its own.
# This is useful for zero downtime deployments as the systemd sockets hold up the connections while Poseidon is restarting.
# Iff systemdsocketactivation, the configured address and port will not be used, instead the provided systemd socket will be.
systemdsocketactivation: false
# If set, this token is required in the `Poseidon-Token` header for each route except /health # If set, this token is required in the `Poseidon-Token` header for each route except /health
# token: SECRET # token: SECRET
# Configuration of TLS between the web client and Poseidon. # Configuration of TLS between the web client and Poseidon.

3
go.mod
View File

@ -3,6 +3,7 @@ module github.com/openHPI/poseidon
go 1.21.1 go 1.21.1
require ( require (
github.com/coreos/go-systemd/v22 v22.5.0
github.com/getsentry/sentry-go v0.26.0 github.com/getsentry/sentry-go v0.26.0
github.com/google/uuid v1.5.0 github.com/google/uuid v1.5.0
github.com/gorilla/mux v1.8.1 github.com/gorilla/mux v1.8.1
@ -11,6 +12,7 @@ require (
github.com/hashicorp/nomad/api v0.0.0-20230922145839-20eadc7b296c github.com/hashicorp/nomad/api v0.0.0-20230922145839-20eadc7b296c
github.com/influxdata/influxdb-client-go/v2 v2.13.0 github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/mitchellh/mapstructure v1.5.0 github.com/mitchellh/mapstructure v1.5.0
github.com/shirou/gopsutil/v3 v3.23.9
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.8.4 github.com/stretchr/testify v1.8.4
golang.org/x/sys v0.16.0 golang.org/x/sys v0.16.0
@ -85,7 +87,6 @@ require (
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/ryanuber/go-glob v1.0.0 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
github.com/shirou/gopsutil/v3 v3.23.9 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/stretchr/objx v0.5.1 // indirect github.com/stretchr/objx v0.5.1 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect

3
go.sum
View File

@ -46,6 +46,8 @@ github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4r
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
@ -74,6 +76,7 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68= github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI= github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI=
github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY= github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY=

View File

@ -21,9 +21,10 @@ import (
var ( var (
Config = &configuration{ Config = &configuration{
Server: server{ Server: server{
Address: "127.0.0.1", Address: "127.0.0.1",
Port: 7200, Port: 7200,
Token: "", SystemdSocketActivation: false,
Token: "",
TLS: TLS{ TLS: TLS{
Active: false, Active: false,
CertFile: "", CertFile: "",
@ -90,13 +91,14 @@ type alert struct {
// server configures the Poseidon webserver. // server configures the Poseidon webserver.
type server struct { type server struct {
Address string Address string
Port int Port int
Token string SystemdSocketActivation bool
TLS TLS Token string
InteractiveStderr bool TLS TLS
TemplateJobFile string InteractiveStderr bool
Alert alert TemplateJobFile string
Alert alert
} }
// URL returns the URL of the Poseidon webserver. // URL returns the URL of the Poseidon webserver.

View File

@ -1,7 +1,6 @@
package recovery package recovery
import ( import (
"context"
"encoding/json" "encoding/json"
"flag" "flag"
nomadApi "github.com/hashicorp/nomad/api" nomadApi "github.com/hashicorp/nomad/api"
@ -29,7 +28,6 @@ import (
var ( var (
log = logging.GetLogger("e2e-recovery") log = logging.GetLogger("e2e-recovery")
testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests") testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests")
poseidonBinary = flag.String("poseidonPath", "", "The path to the Poseidon binary")
nomadClient *nomadApi.Client nomadClient *nomadApi.Client
nomadNamespace string nomadNamespace string
) )
@ -42,8 +40,7 @@ const (
type E2ERecoveryTestSuite struct { type E2ERecoveryTestSuite struct {
suite.Suite suite.Suite
runnerID string runnerID string
poseidonCancel context.CancelFunc
} }
// Overwrite TestMain for custom setup. // Overwrite TestMain for custom setup.
@ -51,10 +48,6 @@ func TestMain(m *testing.M) {
if err := config.InitConfig(); err != nil { if err := config.InitConfig(); err != nil {
log.WithError(err).Fatal("Could not initialize configuration") log.WithError(err).Fatal("Could not initialize configuration")
} }
if *poseidonBinary == "" {
log.Fatal("You must specify the -path to the Poseidon binary!")
}
if *testDockerImage == "" { if *testDockerImage == "" {
log.Fatal("You must specify the -dockerImage flag!") log.Fatal("You must specify the -dockerImage flag!")
} }
@ -77,20 +70,12 @@ func TestMain(m *testing.M) {
func TestE2ERecoveryTests(t *testing.T) { func TestE2ERecoveryTests(t *testing.T) {
testSuite := new(E2ERecoveryTestSuite) testSuite := new(E2ERecoveryTestSuite)
ctx, cancelPoseidon := context.WithCancel(context.Background())
testSuite.poseidonCancel = cancelPoseidon
startPoseidon(ctx, cancelPoseidon)
waitForPoseidon()
e2e.CreateDefaultEnvironment(PrewarmingPoolSize, *testDockerImage) e2e.CreateDefaultEnvironment(PrewarmingPoolSize, *testDockerImage)
e2e.WaitForDefaultEnvironment() e2e.WaitForDefaultEnvironment()
suite.Run(t, testSuite) suite.Run(t, testSuite)
TearDown() TearDown()
testSuite.poseidonCancel()
<-time.After(tests.ShortTimeout)
} }
func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() { func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() {
@ -99,6 +84,7 @@ func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() {
} }
func (s *E2ERecoveryTestSuite) TestInactivityTimer_Expired() { func (s *E2ERecoveryTestSuite) TestInactivityTimer_Expired() {
waitForPoseidon() // The timeout begins only when the runner is recovered.
<-time.After(InactivityTimeout * time.Second) <-time.After(InactivityTimeout * time.Second)
_, err := e2e.ProvideWebSocketURL(s.runnerID, &dto.ExecutionRequest{Command: "true"}) _, err := e2e.ProvideWebSocketURL(s.runnerID, &dto.ExecutionRequest{Command: "true"})
s.Error(err) s.Error(err)

View File

@ -1,15 +1,13 @@
package recovery package recovery
import ( import (
"context"
"github.com/openHPI/poseidon/internal/api" "github.com/openHPI/poseidon/internal/api"
"github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/tests" "github.com/openHPI/poseidon/tests"
"github.com/openHPI/poseidon/tests/e2e" "github.com/openHPI/poseidon/tests/e2e"
"github.com/openHPI/poseidon/tests/helpers" "github.com/openHPI/poseidon/tests/helpers"
"github.com/shirou/gopsutil/v3/process"
"net/http" "net/http"
"os"
"os/exec"
"time" "time"
) )
@ -27,12 +25,8 @@ func (s *E2ERecoveryTestSuite) SetupTest() {
} }
<-time.After(tests.ShortTimeout) <-time.After(tests.ShortTimeout)
s.poseidonCancel() killPoseidon()
<-time.After(tests.ShortTimeout) <-time.After(tests.ShortTimeout)
ctx, cancelPoseidon := context.WithCancel(context.Background())
s.poseidonCancel = cancelPoseidon
startPoseidon(ctx, cancelPoseidon)
waitForPoseidon()
} }
func TearDown() { func TearDown() {
@ -43,16 +37,6 @@ func TearDown() {
} }
} }
func startPoseidon(ctx context.Context, cancelPoseidon context.CancelFunc) {
poseidon := exec.CommandContext(ctx, *poseidonBinary) //nolint:gosec // We accept that another binary can be executed.
poseidon.Stdout = os.Stdout
poseidon.Stderr = os.Stderr
if err := poseidon.Start(); err != nil {
cancelPoseidon()
log.WithError(err).Fatal("Failed to start Poseidon")
}
}
func waitForPoseidon() { func waitForPoseidon() {
done := false done := false
for !done { for !done {
@ -61,3 +45,24 @@ func waitForPoseidon() {
done = err == nil && resp.StatusCode == http.StatusNoContent done = err == nil && resp.StatusCode == http.StatusNoContent
} }
} }
func killPoseidon() {
processes, err := process.Processes()
if err != nil {
log.WithError(err).Error("Error listing processes")
}
for _, p := range processes {
n, err := p.Name()
if err != nil {
continue
}
if n == "poseidon" {
err = p.Kill()
if err != nil {
log.WithError(err).Error("Error killing Poseidon")
} else {
log.Info("Killed Poseidon")
}
}
}
}