Configure Systemd Socket Activation

as new way for Poseidon to accept connections. This should reduce our issues caused by deployments.
This commit is contained in:
Maximilian Paß
2023-12-02 16:56:43 +01:00
parent eaa022282c
commit eaddc65989
11 changed files with 128 additions and 58 deletions

View File

@ -208,9 +208,20 @@ jobs:
./poseidon | tee poseidon.log &
until curl -s --fail http://localhost:7200/api/v1/health ; do sleep 1; done
make e2e-test
- name: Run e2e recovery tests
- name: Setup Poseidon Socket
run: |
killall poseidon
mkdir -p ~/.config/systemd/user
cp ./.github/workflows/resources/poseidon-minimal.socket ~/.config/systemd/user/poseidon.socket
cat ./.github/workflows/resources/poseidon-minimal.service | envsubst > ~/.config/systemd/user/poseidon.service
systemctl --user daemon-reload
systemctl --user start poseidon.socket
- name: Print Poseidon Failure logs
if: failure()
run: journalctl -xen --no-pager
- name: Run e2e recovery tests
run: |
tail -f /var/log/syslog &
make e2e-test-recovery
if: ${{ success() || failure() }}
- name: Convert coverage reports

View File

@ -0,0 +1,12 @@
# This is a minimal service definition to make use of the systemd socket activation in the e2e tests.
# With Systemd socket activation, systemd sets up a listening socket on behalf of a service.
# This is useful for zero downtime deployments as the systemd sockets hold up the connections while the service is restarting.
[Unit]
Requires=poseidon.socket
[Service]
WorkingDirectory=${GITHUB_WORKSPACE}
ExecStart=${GITHUB_WORKSPACE}/poseidon
Restart=always
Environment="POSEIDON_SERVER_SYSTEMDSOCKETACTIVATION=TRUE"

View File

@ -0,0 +1,4 @@
# This is a minimal socket definition to provide a systemd socket for the Poseidon e2e tests.
[Socket]
ListenStream = 7200

View File

@ -121,6 +121,11 @@ run-with-coverage: build-cover ## Run binary and capture code coverage (during e
@mkdir -p $(GOCOVERDIR)
@GOCOVERDIR=$(GOCOVERDIR) ./$(PROJECT_NAME)
## This target uses `systemd-socket-activate` (only Linux) to create a systemd socket and makes it accessible to a new Poseidon execution.
.PHONY: run-with-socket
run-with-socket: build
@systemd-socket-activate -l 7200 ./$(PROJECT_NAME)
.PHONY: convert-run-coverage
convert-run-coverage: ## Convert coverage data (created by `run-with-coverage`) to legacy text format
@go tool covdata textfmt -i $(GOCOVERDIR) -o $(GOCOVERDIR)/coverage_run.cov
@ -138,7 +143,7 @@ e2e-test: deps ## Run e2e tests
.PHONY: e2e-test-recovery
e2e-test-recovery: deps ## Run recovery e2e tests
@go test -count=1 ./tests/recovery -v -args -poseidonPath="../../poseidon" -dockerImage="$(E2E_TEST_DOCKER_IMAGE)"
@go test -count=1 ./tests/recovery -v -args -dockerImage="$(E2E_TEST_DOCKER_IMAGE)"
.PHONY: e2e-docker
e2e-docker: docker ## Run e2e tests against the Docker container

View File

@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"github.com/coreos/go-systemd/v22/activation"
"github.com/getsentry/sentry-go"
sentryhttp "github.com/getsentry/sentry-go/http"
"github.com/openHPI/poseidon/internal/api"
@ -14,6 +15,7 @@ import (
"github.com/openHPI/poseidon/pkg/logging"
"github.com/openHPI/poseidon/pkg/monitoring"
"golang.org/x/sys/unix"
"net"
"net/http"
"os"
"os/signal"
@ -21,6 +23,7 @@ import (
"runtime/debug"
"runtime/pprof"
"strconv"
"sync"
"time"
)
@ -156,24 +159,58 @@ func runServer(server *http.Server, cancel context.CancelFunc) {
defer cancel()
defer shutdownSentry() // shutdownSentry must be executed in the main goroutine.
log.WithField("address", server.Addr).Info("Starting server")
httpListeners := getHTTPListeners(server)
serveHTTPListeners(server, httpListeners)
}
func getHTTPListeners(server *http.Server) (httpListeners []net.Listener) {
var err error
if config.Config.Server.SystemdSocketActivation {
httpListeners, err = activation.Listeners()
} else {
var httpListener net.Listener
httpListener, err = net.Listen("tcp", server.Addr)
httpListeners = append(httpListeners, httpListener)
}
if err != nil || httpListeners == nil || len(httpListeners) == 0 {
log.WithError(err).
WithField("listeners", httpListeners).
WithField("systemd_socket", config.Config.Server.SystemdSocketActivation).
Fatal("Failed listening to any socket")
return nil
}
return httpListeners
}
func serveHTTPListeners(server *http.Server, httpListeners []net.Listener) {
var wg sync.WaitGroup
wg.Add(len(httpListeners))
for _, l := range httpListeners {
go func(listener net.Listener) {
defer wg.Done()
log.WithField("address", listener.Addr()).Info("Serving Listener")
serveHTTPListener(server, listener)
}(l)
}
wg.Wait()
}
func serveHTTPListener(server *http.Server, l net.Listener) {
var err error
if config.Config.Server.TLS.Active {
server.TLSConfig = config.TLSConfig
log.
WithField("CertFile", config.Config.Server.TLS.CertFile).
log.WithField("CertFile", config.Config.Server.TLS.CertFile).
WithField("KeyFile", config.Config.Server.TLS.KeyFile).
Debug("Using TLS")
err = server.ListenAndServeTLS(config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile)
err = server.ServeTLS(l, config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile)
} else {
err = server.ListenAndServe()
err = server.Serve(l)
}
if err != nil {
if errors.Is(err, http.ErrServerClosed) {
log.WithError(err).Info("Server closed")
} else {
log.WithError(err).Fatal("Error during listening and serving")
}
if errors.Is(err, http.ErrServerClosed) {
log.WithError(err).WithField("listener", l).Info("Server closed")
} else {
log.WithError(err).WithField("listener", l).Error("Error during listening and serving")
}
}

View File

@ -5,6 +5,10 @@ server:
address: 127.0.0.1
# Port on which the webserver listens
port: 7200
# When using Systemd socket activation, Poseidon tries to connect to an existing systemd socket instead of creating its own.
# This is useful for zero downtime deployments as the systemd sockets hold up the connections while Poseidon is restarting.
# Iff systemdsocketactivation, the configured address and port will not be used, instead the provided systemd socket will be.
systemdsocketactivation: false
# If set, this token is required in the `Poseidon-Token` header for each route except /health
# token: SECRET
# Configuration of TLS between the web client and Poseidon.

3
go.mod
View File

@ -3,6 +3,7 @@ module github.com/openHPI/poseidon
go 1.21.1
require (
github.com/coreos/go-systemd/v22 v22.5.0
github.com/getsentry/sentry-go v0.26.0
github.com/google/uuid v1.5.0
github.com/gorilla/mux v1.8.1
@ -11,6 +12,7 @@ require (
github.com/hashicorp/nomad/api v0.0.0-20230922145839-20eadc7b296c
github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/mitchellh/mapstructure v1.5.0
github.com/shirou/gopsutil/v3 v3.23.9
github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.8.4
golang.org/x/sys v0.16.0
@ -85,7 +87,6 @@ require (
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/ryanuber/go-glob v1.0.0 // indirect
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
github.com/shirou/gopsutil/v3 v3.23.9 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/stretchr/objx v0.5.1 // indirect
github.com/tklauser/go-sysconf v0.3.12 // indirect

3
go.sum
View File

@ -46,6 +46,8 @@ github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4r
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
@ -74,6 +76,7 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI=
github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY=

View File

@ -21,9 +21,10 @@ import (
var (
Config = &configuration{
Server: server{
Address: "127.0.0.1",
Port: 7200,
Token: "",
Address: "127.0.0.1",
Port: 7200,
SystemdSocketActivation: false,
Token: "",
TLS: TLS{
Active: false,
CertFile: "",
@ -90,13 +91,14 @@ type alert struct {
// server configures the Poseidon webserver.
type server struct {
Address string
Port int
Token string
TLS TLS
InteractiveStderr bool
TemplateJobFile string
Alert alert
Address string
Port int
SystemdSocketActivation bool
Token string
TLS TLS
InteractiveStderr bool
TemplateJobFile string
Alert alert
}
// URL returns the URL of the Poseidon webserver.

View File

@ -1,7 +1,6 @@
package recovery
import (
"context"
"encoding/json"
"flag"
nomadApi "github.com/hashicorp/nomad/api"
@ -29,7 +28,6 @@ import (
var (
log = logging.GetLogger("e2e-recovery")
testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests")
poseidonBinary = flag.String("poseidonPath", "", "The path to the Poseidon binary")
nomadClient *nomadApi.Client
nomadNamespace string
)
@ -42,8 +40,7 @@ const (
type E2ERecoveryTestSuite struct {
suite.Suite
runnerID string
poseidonCancel context.CancelFunc
runnerID string
}
// Overwrite TestMain for custom setup.
@ -51,10 +48,6 @@ func TestMain(m *testing.M) {
if err := config.InitConfig(); err != nil {
log.WithError(err).Fatal("Could not initialize configuration")
}
if *poseidonBinary == "" {
log.Fatal("You must specify the -path to the Poseidon binary!")
}
if *testDockerImage == "" {
log.Fatal("You must specify the -dockerImage flag!")
}
@ -77,20 +70,12 @@ func TestMain(m *testing.M) {
func TestE2ERecoveryTests(t *testing.T) {
testSuite := new(E2ERecoveryTestSuite)
ctx, cancelPoseidon := context.WithCancel(context.Background())
testSuite.poseidonCancel = cancelPoseidon
startPoseidon(ctx, cancelPoseidon)
waitForPoseidon()
e2e.CreateDefaultEnvironment(PrewarmingPoolSize, *testDockerImage)
e2e.WaitForDefaultEnvironment()
suite.Run(t, testSuite)
TearDown()
testSuite.poseidonCancel()
<-time.After(tests.ShortTimeout)
}
func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() {
@ -99,6 +84,7 @@ func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() {
}
func (s *E2ERecoveryTestSuite) TestInactivityTimer_Expired() {
waitForPoseidon() // The timeout begins only when the runner is recovered.
<-time.After(InactivityTimeout * time.Second)
_, err := e2e.ProvideWebSocketURL(s.runnerID, &dto.ExecutionRequest{Command: "true"})
s.Error(err)

View File

@ -1,15 +1,13 @@
package recovery
import (
"context"
"github.com/openHPI/poseidon/internal/api"
"github.com/openHPI/poseidon/pkg/dto"
"github.com/openHPI/poseidon/tests"
"github.com/openHPI/poseidon/tests/e2e"
"github.com/openHPI/poseidon/tests/helpers"
"github.com/shirou/gopsutil/v3/process"
"net/http"
"os"
"os/exec"
"time"
)
@ -27,12 +25,8 @@ func (s *E2ERecoveryTestSuite) SetupTest() {
}
<-time.After(tests.ShortTimeout)
s.poseidonCancel()
killPoseidon()
<-time.After(tests.ShortTimeout)
ctx, cancelPoseidon := context.WithCancel(context.Background())
s.poseidonCancel = cancelPoseidon
startPoseidon(ctx, cancelPoseidon)
waitForPoseidon()
}
func TearDown() {
@ -43,16 +37,6 @@ func TearDown() {
}
}
func startPoseidon(ctx context.Context, cancelPoseidon context.CancelFunc) {
poseidon := exec.CommandContext(ctx, *poseidonBinary) //nolint:gosec // We accept that another binary can be executed.
poseidon.Stdout = os.Stdout
poseidon.Stderr = os.Stderr
if err := poseidon.Start(); err != nil {
cancelPoseidon()
log.WithError(err).Fatal("Failed to start Poseidon")
}
}
func waitForPoseidon() {
done := false
for !done {
@ -61,3 +45,24 @@ func waitForPoseidon() {
done = err == nil && resp.StatusCode == http.StatusNoContent
}
}
func killPoseidon() {
processes, err := process.Processes()
if err != nil {
log.WithError(err).Error("Error listing processes")
}
for _, p := range processes {
n, err := p.Name()
if err != nil {
continue
}
if n == "poseidon" {
err = p.Kill()
if err != nil {
log.WithError(err).Error("Error killing Poseidon")
} else {
log.Info("Killed Poseidon")
}
}
}
}