Configure Systemd Socket Activation
as new way for Poseidon to accept connections. This should reduce our issues caused by deployments.
This commit is contained in:
13
.github/workflows/ci.yml
vendored
13
.github/workflows/ci.yml
vendored
@ -208,9 +208,20 @@ jobs:
|
||||
./poseidon | tee poseidon.log &
|
||||
until curl -s --fail http://localhost:7200/api/v1/health ; do sleep 1; done
|
||||
make e2e-test
|
||||
- name: Run e2e recovery tests
|
||||
- name: Setup Poseidon Socket
|
||||
run: |
|
||||
killall poseidon
|
||||
mkdir -p ~/.config/systemd/user
|
||||
cp ./.github/workflows/resources/poseidon-minimal.socket ~/.config/systemd/user/poseidon.socket
|
||||
cat ./.github/workflows/resources/poseidon-minimal.service | envsubst > ~/.config/systemd/user/poseidon.service
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user start poseidon.socket
|
||||
- name: Print Poseidon Failure logs
|
||||
if: failure()
|
||||
run: journalctl -xen --no-pager
|
||||
- name: Run e2e recovery tests
|
||||
run: |
|
||||
tail -f /var/log/syslog &
|
||||
make e2e-test-recovery
|
||||
if: ${{ success() || failure() }}
|
||||
- name: Convert coverage reports
|
||||
|
12
.github/workflows/resources/poseidon-minimal.service
vendored
Normal file
12
.github/workflows/resources/poseidon-minimal.service
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
# This is a minimal service definition to make use of the systemd socket activation in the e2e tests.
|
||||
# With Systemd socket activation, systemd sets up a listening socket on behalf of a service.
|
||||
# This is useful for zero downtime deployments as the systemd sockets hold up the connections while the service is restarting.
|
||||
|
||||
[Unit]
|
||||
Requires=poseidon.socket
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=${GITHUB_WORKSPACE}
|
||||
ExecStart=${GITHUB_WORKSPACE}/poseidon
|
||||
Restart=always
|
||||
Environment="POSEIDON_SERVER_SYSTEMDSOCKETACTIVATION=TRUE"
|
4
.github/workflows/resources/poseidon-minimal.socket
vendored
Normal file
4
.github/workflows/resources/poseidon-minimal.socket
vendored
Normal file
@ -0,0 +1,4 @@
|
||||
# This is a minimal socket definition to provide a systemd socket for the Poseidon e2e tests.
|
||||
|
||||
[Socket]
|
||||
ListenStream = 7200
|
7
Makefile
7
Makefile
@ -121,6 +121,11 @@ run-with-coverage: build-cover ## Run binary and capture code coverage (during e
|
||||
@mkdir -p $(GOCOVERDIR)
|
||||
@GOCOVERDIR=$(GOCOVERDIR) ./$(PROJECT_NAME)
|
||||
|
||||
## This target uses `systemd-socket-activate` (only Linux) to create a systemd socket and makes it accessible to a new Poseidon execution.
|
||||
.PHONY: run-with-socket
|
||||
run-with-socket: build
|
||||
@systemd-socket-activate -l 7200 ./$(PROJECT_NAME)
|
||||
|
||||
.PHONY: convert-run-coverage
|
||||
convert-run-coverage: ## Convert coverage data (created by `run-with-coverage`) to legacy text format
|
||||
@go tool covdata textfmt -i $(GOCOVERDIR) -o $(GOCOVERDIR)/coverage_run.cov
|
||||
@ -138,7 +143,7 @@ e2e-test: deps ## Run e2e tests
|
||||
|
||||
.PHONY: e2e-test-recovery
|
||||
e2e-test-recovery: deps ## Run recovery e2e tests
|
||||
@go test -count=1 ./tests/recovery -v -args -poseidonPath="../../poseidon" -dockerImage="$(E2E_TEST_DOCKER_IMAGE)"
|
||||
@go test -count=1 ./tests/recovery -v -args -dockerImage="$(E2E_TEST_DOCKER_IMAGE)"
|
||||
|
||||
.PHONY: e2e-docker
|
||||
e2e-docker: docker ## Run e2e tests against the Docker container
|
||||
|
@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/coreos/go-systemd/v22/activation"
|
||||
"github.com/getsentry/sentry-go"
|
||||
sentryhttp "github.com/getsentry/sentry-go/http"
|
||||
"github.com/openHPI/poseidon/internal/api"
|
||||
@ -14,6 +15,7 @@ import (
|
||||
"github.com/openHPI/poseidon/pkg/logging"
|
||||
"github.com/openHPI/poseidon/pkg/monitoring"
|
||||
"golang.org/x/sys/unix"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/signal"
|
||||
@ -21,6 +23,7 @@ import (
|
||||
"runtime/debug"
|
||||
"runtime/pprof"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -156,24 +159,58 @@ func runServer(server *http.Server, cancel context.CancelFunc) {
|
||||
defer cancel()
|
||||
defer shutdownSentry() // shutdownSentry must be executed in the main goroutine.
|
||||
|
||||
log.WithField("address", server.Addr).Info("Starting server")
|
||||
httpListeners := getHTTPListeners(server)
|
||||
serveHTTPListeners(server, httpListeners)
|
||||
}
|
||||
|
||||
func getHTTPListeners(server *http.Server) (httpListeners []net.Listener) {
|
||||
var err error
|
||||
if config.Config.Server.SystemdSocketActivation {
|
||||
httpListeners, err = activation.Listeners()
|
||||
} else {
|
||||
var httpListener net.Listener
|
||||
httpListener, err = net.Listen("tcp", server.Addr)
|
||||
httpListeners = append(httpListeners, httpListener)
|
||||
}
|
||||
if err != nil || httpListeners == nil || len(httpListeners) == 0 {
|
||||
log.WithError(err).
|
||||
WithField("listeners", httpListeners).
|
||||
WithField("systemd_socket", config.Config.Server.SystemdSocketActivation).
|
||||
Fatal("Failed listening to any socket")
|
||||
return nil
|
||||
}
|
||||
return httpListeners
|
||||
}
|
||||
|
||||
func serveHTTPListeners(server *http.Server, httpListeners []net.Listener) {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(httpListeners))
|
||||
for _, l := range httpListeners {
|
||||
go func(listener net.Listener) {
|
||||
defer wg.Done()
|
||||
log.WithField("address", listener.Addr()).Info("Serving Listener")
|
||||
serveHTTPListener(server, listener)
|
||||
}(l)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func serveHTTPListener(server *http.Server, l net.Listener) {
|
||||
var err error
|
||||
if config.Config.Server.TLS.Active {
|
||||
server.TLSConfig = config.TLSConfig
|
||||
log.
|
||||
WithField("CertFile", config.Config.Server.TLS.CertFile).
|
||||
log.WithField("CertFile", config.Config.Server.TLS.CertFile).
|
||||
WithField("KeyFile", config.Config.Server.TLS.KeyFile).
|
||||
Debug("Using TLS")
|
||||
err = server.ListenAndServeTLS(config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile)
|
||||
err = server.ServeTLS(l, config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile)
|
||||
} else {
|
||||
err = server.ListenAndServe()
|
||||
err = server.Serve(l)
|
||||
}
|
||||
if err != nil {
|
||||
if errors.Is(err, http.ErrServerClosed) {
|
||||
log.WithError(err).Info("Server closed")
|
||||
} else {
|
||||
log.WithError(err).Fatal("Error during listening and serving")
|
||||
}
|
||||
|
||||
if errors.Is(err, http.ErrServerClosed) {
|
||||
log.WithError(err).WithField("listener", l).Info("Server closed")
|
||||
} else {
|
||||
log.WithError(err).WithField("listener", l).Error("Error during listening and serving")
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,10 @@ server:
|
||||
address: 127.0.0.1
|
||||
# Port on which the webserver listens
|
||||
port: 7200
|
||||
# When using Systemd socket activation, Poseidon tries to connect to an existing systemd socket instead of creating its own.
|
||||
# This is useful for zero downtime deployments as the systemd sockets hold up the connections while Poseidon is restarting.
|
||||
# Iff systemdsocketactivation, the configured address and port will not be used, instead the provided systemd socket will be.
|
||||
systemdsocketactivation: false
|
||||
# If set, this token is required in the `Poseidon-Token` header for each route except /health
|
||||
# token: SECRET
|
||||
# Configuration of TLS between the web client and Poseidon.
|
||||
|
3
go.mod
3
go.mod
@ -3,6 +3,7 @@ module github.com/openHPI/poseidon
|
||||
go 1.21.1
|
||||
|
||||
require (
|
||||
github.com/coreos/go-systemd/v22 v22.5.0
|
||||
github.com/getsentry/sentry-go v0.26.0
|
||||
github.com/google/uuid v1.5.0
|
||||
github.com/gorilla/mux v1.8.1
|
||||
@ -11,6 +12,7 @@ require (
|
||||
github.com/hashicorp/nomad/api v0.0.0-20230922145839-20eadc7b296c
|
||||
github.com/influxdata/influxdb-client-go/v2 v2.13.0
|
||||
github.com/mitchellh/mapstructure v1.5.0
|
||||
github.com/shirou/gopsutil/v3 v3.23.9
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
github.com/stretchr/testify v1.8.4
|
||||
golang.org/x/sys v0.16.0
|
||||
@ -85,7 +87,6 @@ require (
|
||||
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
|
||||
github.com/ryanuber/go-glob v1.0.0 // indirect
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
|
||||
github.com/shirou/gopsutil/v3 v3.23.9 // indirect
|
||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||
github.com/stretchr/objx v0.5.1 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.12 // indirect
|
||||
|
3
go.sum
3
go.sum
@ -46,6 +46,8 @@ github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4r
|
||||
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag=
|
||||
github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs=
|
||||
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
|
||||
@ -74,6 +76,7 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
|
||||
github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
|
||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI=
|
||||
github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY=
|
||||
|
@ -21,9 +21,10 @@ import (
|
||||
var (
|
||||
Config = &configuration{
|
||||
Server: server{
|
||||
Address: "127.0.0.1",
|
||||
Port: 7200,
|
||||
Token: "",
|
||||
Address: "127.0.0.1",
|
||||
Port: 7200,
|
||||
SystemdSocketActivation: false,
|
||||
Token: "",
|
||||
TLS: TLS{
|
||||
Active: false,
|
||||
CertFile: "",
|
||||
@ -90,13 +91,14 @@ type alert struct {
|
||||
|
||||
// server configures the Poseidon webserver.
|
||||
type server struct {
|
||||
Address string
|
||||
Port int
|
||||
Token string
|
||||
TLS TLS
|
||||
InteractiveStderr bool
|
||||
TemplateJobFile string
|
||||
Alert alert
|
||||
Address string
|
||||
Port int
|
||||
SystemdSocketActivation bool
|
||||
Token string
|
||||
TLS TLS
|
||||
InteractiveStderr bool
|
||||
TemplateJobFile string
|
||||
Alert alert
|
||||
}
|
||||
|
||||
// URL returns the URL of the Poseidon webserver.
|
||||
|
@ -1,7 +1,6 @@
|
||||
package recovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"flag"
|
||||
nomadApi "github.com/hashicorp/nomad/api"
|
||||
@ -29,7 +28,6 @@ import (
|
||||
var (
|
||||
log = logging.GetLogger("e2e-recovery")
|
||||
testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests")
|
||||
poseidonBinary = flag.String("poseidonPath", "", "The path to the Poseidon binary")
|
||||
nomadClient *nomadApi.Client
|
||||
nomadNamespace string
|
||||
)
|
||||
@ -42,8 +40,7 @@ const (
|
||||
|
||||
type E2ERecoveryTestSuite struct {
|
||||
suite.Suite
|
||||
runnerID string
|
||||
poseidonCancel context.CancelFunc
|
||||
runnerID string
|
||||
}
|
||||
|
||||
// Overwrite TestMain for custom setup.
|
||||
@ -51,10 +48,6 @@ func TestMain(m *testing.M) {
|
||||
if err := config.InitConfig(); err != nil {
|
||||
log.WithError(err).Fatal("Could not initialize configuration")
|
||||
}
|
||||
|
||||
if *poseidonBinary == "" {
|
||||
log.Fatal("You must specify the -path to the Poseidon binary!")
|
||||
}
|
||||
if *testDockerImage == "" {
|
||||
log.Fatal("You must specify the -dockerImage flag!")
|
||||
}
|
||||
@ -77,20 +70,12 @@ func TestMain(m *testing.M) {
|
||||
func TestE2ERecoveryTests(t *testing.T) {
|
||||
testSuite := new(E2ERecoveryTestSuite)
|
||||
|
||||
ctx, cancelPoseidon := context.WithCancel(context.Background())
|
||||
testSuite.poseidonCancel = cancelPoseidon
|
||||
|
||||
startPoseidon(ctx, cancelPoseidon)
|
||||
waitForPoseidon()
|
||||
|
||||
e2e.CreateDefaultEnvironment(PrewarmingPoolSize, *testDockerImage)
|
||||
e2e.WaitForDefaultEnvironment()
|
||||
|
||||
suite.Run(t, testSuite)
|
||||
|
||||
TearDown()
|
||||
testSuite.poseidonCancel()
|
||||
<-time.After(tests.ShortTimeout)
|
||||
}
|
||||
|
||||
func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() {
|
||||
@ -99,6 +84,7 @@ func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() {
|
||||
}
|
||||
|
||||
func (s *E2ERecoveryTestSuite) TestInactivityTimer_Expired() {
|
||||
waitForPoseidon() // The timeout begins only when the runner is recovered.
|
||||
<-time.After(InactivityTimeout * time.Second)
|
||||
_, err := e2e.ProvideWebSocketURL(s.runnerID, &dto.ExecutionRequest{Command: "true"})
|
||||
s.Error(err)
|
||||
|
@ -1,15 +1,13 @@
|
||||
package recovery
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/openHPI/poseidon/internal/api"
|
||||
"github.com/openHPI/poseidon/pkg/dto"
|
||||
"github.com/openHPI/poseidon/tests"
|
||||
"github.com/openHPI/poseidon/tests/e2e"
|
||||
"github.com/openHPI/poseidon/tests/helpers"
|
||||
"github.com/shirou/gopsutil/v3/process"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -27,12 +25,8 @@ func (s *E2ERecoveryTestSuite) SetupTest() {
|
||||
}
|
||||
|
||||
<-time.After(tests.ShortTimeout)
|
||||
s.poseidonCancel()
|
||||
killPoseidon()
|
||||
<-time.After(tests.ShortTimeout)
|
||||
ctx, cancelPoseidon := context.WithCancel(context.Background())
|
||||
s.poseidonCancel = cancelPoseidon
|
||||
startPoseidon(ctx, cancelPoseidon)
|
||||
waitForPoseidon()
|
||||
}
|
||||
|
||||
func TearDown() {
|
||||
@ -43,16 +37,6 @@ func TearDown() {
|
||||
}
|
||||
}
|
||||
|
||||
func startPoseidon(ctx context.Context, cancelPoseidon context.CancelFunc) {
|
||||
poseidon := exec.CommandContext(ctx, *poseidonBinary) //nolint:gosec // We accept that another binary can be executed.
|
||||
poseidon.Stdout = os.Stdout
|
||||
poseidon.Stderr = os.Stderr
|
||||
if err := poseidon.Start(); err != nil {
|
||||
cancelPoseidon()
|
||||
log.WithError(err).Fatal("Failed to start Poseidon")
|
||||
}
|
||||
}
|
||||
|
||||
func waitForPoseidon() {
|
||||
done := false
|
||||
for !done {
|
||||
@ -61,3 +45,24 @@ func waitForPoseidon() {
|
||||
done = err == nil && resp.StatusCode == http.StatusNoContent
|
||||
}
|
||||
}
|
||||
|
||||
func killPoseidon() {
|
||||
processes, err := process.Processes()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error listing processes")
|
||||
}
|
||||
for _, p := range processes {
|
||||
n, err := p.Name()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if n == "poseidon" {
|
||||
err = p.Kill()
|
||||
if err != nil {
|
||||
log.WithError(err).Error("Error killing Poseidon")
|
||||
} else {
|
||||
log.Info("Killed Poseidon")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user