From eaddc659891847cfb1951f83be3a0b2d1523367d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20Pa=C3=9F?= <22845248+mpass99@users.noreply.github.com> Date: Sat, 2 Dec 2023 16:56:43 +0100 Subject: [PATCH] Configure Systemd Socket Activation as new way for Poseidon to accept connections. This should reduce our issues caused by deployments. --- .github/workflows/ci.yml | 13 +++- .../resources/poseidon-minimal.service | 12 ++++ .../resources/poseidon-minimal.socket | 4 ++ Makefile | 7 ++- cmd/poseidon/main.go | 59 +++++++++++++++---- configuration.example.yaml | 4 ++ go.mod | 3 +- go.sum | 3 + internal/config/config.go | 22 +++---- tests/recovery/e2e_recovery_test.go | 18 +----- tests/recovery/setup_test.go | 41 +++++++------ 11 files changed, 128 insertions(+), 58 deletions(-) create mode 100644 .github/workflows/resources/poseidon-minimal.service create mode 100644 .github/workflows/resources/poseidon-minimal.socket diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8b104d9..55bd618 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -208,9 +208,20 @@ jobs: ./poseidon | tee poseidon.log & until curl -s --fail http://localhost:7200/api/v1/health ; do sleep 1; done make e2e-test - - name: Run e2e recovery tests + - name: Setup Poseidon Socket run: | killall poseidon + mkdir -p ~/.config/systemd/user + cp ./.github/workflows/resources/poseidon-minimal.socket ~/.config/systemd/user/poseidon.socket + cat ./.github/workflows/resources/poseidon-minimal.service | envsubst > ~/.config/systemd/user/poseidon.service + systemctl --user daemon-reload + systemctl --user start poseidon.socket + - name: Print Poseidon Failure logs + if: failure() + run: journalctl -xen --no-pager + - name: Run e2e recovery tests + run: | + tail -f /var/log/syslog & make e2e-test-recovery if: ${{ success() || failure() }} - name: Convert coverage reports diff --git a/.github/workflows/resources/poseidon-minimal.service b/.github/workflows/resources/poseidon-minimal.service new file mode 100644 index 0000000..559178e --- /dev/null +++ b/.github/workflows/resources/poseidon-minimal.service @@ -0,0 +1,12 @@ +# This is a minimal service definition to make use of the systemd socket activation in the e2e tests. +# With Systemd socket activation, systemd sets up a listening socket on behalf of a service. +# This is useful for zero downtime deployments as the systemd sockets hold up the connections while the service is restarting. + +[Unit] +Requires=poseidon.socket + +[Service] +WorkingDirectory=${GITHUB_WORKSPACE} +ExecStart=${GITHUB_WORKSPACE}/poseidon +Restart=always +Environment="POSEIDON_SERVER_SYSTEMDSOCKETACTIVATION=TRUE" diff --git a/.github/workflows/resources/poseidon-minimal.socket b/.github/workflows/resources/poseidon-minimal.socket new file mode 100644 index 0000000..ec7cc5c --- /dev/null +++ b/.github/workflows/resources/poseidon-minimal.socket @@ -0,0 +1,4 @@ +# This is a minimal socket definition to provide a systemd socket for the Poseidon e2e tests. + +[Socket] +ListenStream = 7200 diff --git a/Makefile b/Makefile index 5758997..a6c6f7f 100644 --- a/Makefile +++ b/Makefile @@ -121,6 +121,11 @@ run-with-coverage: build-cover ## Run binary and capture code coverage (during e @mkdir -p $(GOCOVERDIR) @GOCOVERDIR=$(GOCOVERDIR) ./$(PROJECT_NAME) +## This target uses `systemd-socket-activate` (only Linux) to create a systemd socket and makes it accessible to a new Poseidon execution. +.PHONY: run-with-socket +run-with-socket: build + @systemd-socket-activate -l 7200 ./$(PROJECT_NAME) + .PHONY: convert-run-coverage convert-run-coverage: ## Convert coverage data (created by `run-with-coverage`) to legacy text format @go tool covdata textfmt -i $(GOCOVERDIR) -o $(GOCOVERDIR)/coverage_run.cov @@ -138,7 +143,7 @@ e2e-test: deps ## Run e2e tests .PHONY: e2e-test-recovery e2e-test-recovery: deps ## Run recovery e2e tests - @go test -count=1 ./tests/recovery -v -args -poseidonPath="../../poseidon" -dockerImage="$(E2E_TEST_DOCKER_IMAGE)" + @go test -count=1 ./tests/recovery -v -args -dockerImage="$(E2E_TEST_DOCKER_IMAGE)" .PHONY: e2e-docker e2e-docker: docker ## Run e2e tests against the Docker container diff --git a/cmd/poseidon/main.go b/cmd/poseidon/main.go index 88a668f..89273c0 100644 --- a/cmd/poseidon/main.go +++ b/cmd/poseidon/main.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "github.com/coreos/go-systemd/v22/activation" "github.com/getsentry/sentry-go" sentryhttp "github.com/getsentry/sentry-go/http" "github.com/openHPI/poseidon/internal/api" @@ -14,6 +15,7 @@ import ( "github.com/openHPI/poseidon/pkg/logging" "github.com/openHPI/poseidon/pkg/monitoring" "golang.org/x/sys/unix" + "net" "net/http" "os" "os/signal" @@ -21,6 +23,7 @@ import ( "runtime/debug" "runtime/pprof" "strconv" + "sync" "time" ) @@ -156,24 +159,58 @@ func runServer(server *http.Server, cancel context.CancelFunc) { defer cancel() defer shutdownSentry() // shutdownSentry must be executed in the main goroutine. - log.WithField("address", server.Addr).Info("Starting server") + httpListeners := getHTTPListeners(server) + serveHTTPListeners(server, httpListeners) +} + +func getHTTPListeners(server *http.Server) (httpListeners []net.Listener) { + var err error + if config.Config.Server.SystemdSocketActivation { + httpListeners, err = activation.Listeners() + } else { + var httpListener net.Listener + httpListener, err = net.Listen("tcp", server.Addr) + httpListeners = append(httpListeners, httpListener) + } + if err != nil || httpListeners == nil || len(httpListeners) == 0 { + log.WithError(err). + WithField("listeners", httpListeners). + WithField("systemd_socket", config.Config.Server.SystemdSocketActivation). + Fatal("Failed listening to any socket") + return nil + } + return httpListeners +} + +func serveHTTPListeners(server *http.Server, httpListeners []net.Listener) { + var wg sync.WaitGroup + wg.Add(len(httpListeners)) + for _, l := range httpListeners { + go func(listener net.Listener) { + defer wg.Done() + log.WithField("address", listener.Addr()).Info("Serving Listener") + serveHTTPListener(server, listener) + }(l) + } + wg.Wait() +} + +func serveHTTPListener(server *http.Server, l net.Listener) { var err error if config.Config.Server.TLS.Active { server.TLSConfig = config.TLSConfig - log. - WithField("CertFile", config.Config.Server.TLS.CertFile). + log.WithField("CertFile", config.Config.Server.TLS.CertFile). WithField("KeyFile", config.Config.Server.TLS.KeyFile). Debug("Using TLS") - err = server.ListenAndServeTLS(config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile) + err = server.ServeTLS(l, config.Config.Server.TLS.CertFile, config.Config.Server.TLS.KeyFile) } else { - err = server.ListenAndServe() + err = server.Serve(l) } - if err != nil { - if errors.Is(err, http.ErrServerClosed) { - log.WithError(err).Info("Server closed") - } else { - log.WithError(err).Fatal("Error during listening and serving") - } + + if errors.Is(err, http.ErrServerClosed) { + log.WithError(err).WithField("listener", l).Info("Server closed") + } else { + log.WithError(err).WithField("listener", l).Error("Error during listening and serving") } } diff --git a/configuration.example.yaml b/configuration.example.yaml index 44e072b..0692dfc 100644 --- a/configuration.example.yaml +++ b/configuration.example.yaml @@ -5,6 +5,10 @@ server: address: 127.0.0.1 # Port on which the webserver listens port: 7200 + # When using Systemd socket activation, Poseidon tries to connect to an existing systemd socket instead of creating its own. + # This is useful for zero downtime deployments as the systemd sockets hold up the connections while Poseidon is restarting. + # Iff systemdsocketactivation, the configured address and port will not be used, instead the provided systemd socket will be. + systemdsocketactivation: false # If set, this token is required in the `Poseidon-Token` header for each route except /health # token: SECRET # Configuration of TLS between the web client and Poseidon. diff --git a/go.mod b/go.mod index 49e600b..bd9a268 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/openHPI/poseidon go 1.21.1 require ( + github.com/coreos/go-systemd/v22 v22.5.0 github.com/getsentry/sentry-go v0.26.0 github.com/google/uuid v1.5.0 github.com/gorilla/mux v1.8.1 @@ -11,6 +12,7 @@ require ( github.com/hashicorp/nomad/api v0.0.0-20230922145839-20eadc7b296c github.com/influxdata/influxdb-client-go/v2 v2.13.0 github.com/mitchellh/mapstructure v1.5.0 + github.com/shirou/gopsutil/v3 v3.23.9 github.com/sirupsen/logrus v1.9.3 github.com/stretchr/testify v1.8.4 golang.org/x/sys v0.16.0 @@ -85,7 +87,6 @@ require ( github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect github.com/ryanuber/go-glob v1.0.0 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect - github.com/shirou/gopsutil/v3 v3.23.9 // indirect github.com/shoenig/go-m1cpu v0.1.6 // indirect github.com/stretchr/objx v0.5.1 // indirect github.com/tklauser/go-sysconf v0.3.12 // indirect diff --git a/go.sum b/go.sum index 3def607..ae55afa 100644 --- a/go.sum +++ b/go.sum @@ -46,6 +46,8 @@ github.com/cenkalti/backoff/v3 v3.2.2/go.mod h1:cIeZDE3IrqwwJl6VUwCN6trj1oXrTS4r github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp5jckzBHf4XRpQvBOLI+I= +github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= @@ -74,6 +76,7 @@ github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiU github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68= github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gojuno/minimock/v3 v3.0.4/go.mod h1:HqeqnwV8mAABn3pO5hqF+RE7gjA0jsN8cbbSogoGrzI= github.com/gojuno/minimock/v3 v3.0.6/go.mod h1:v61ZjAKHr+WnEkND63nQPCZ/DTfQgJdvbCi3IuoMblY= diff --git a/internal/config/config.go b/internal/config/config.go index 5a203bc..47482f2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -21,9 +21,10 @@ import ( var ( Config = &configuration{ Server: server{ - Address: "127.0.0.1", - Port: 7200, - Token: "", + Address: "127.0.0.1", + Port: 7200, + SystemdSocketActivation: false, + Token: "", TLS: TLS{ Active: false, CertFile: "", @@ -90,13 +91,14 @@ type alert struct { // server configures the Poseidon webserver. type server struct { - Address string - Port int - Token string - TLS TLS - InteractiveStderr bool - TemplateJobFile string - Alert alert + Address string + Port int + SystemdSocketActivation bool + Token string + TLS TLS + InteractiveStderr bool + TemplateJobFile string + Alert alert } // URL returns the URL of the Poseidon webserver. diff --git a/tests/recovery/e2e_recovery_test.go b/tests/recovery/e2e_recovery_test.go index 2774a7f..a1ec322 100644 --- a/tests/recovery/e2e_recovery_test.go +++ b/tests/recovery/e2e_recovery_test.go @@ -1,7 +1,6 @@ package recovery import ( - "context" "encoding/json" "flag" nomadApi "github.com/hashicorp/nomad/api" @@ -29,7 +28,6 @@ import ( var ( log = logging.GetLogger("e2e-recovery") testDockerImage = flag.String("dockerImage", "", "Docker image to use in E2E tests") - poseidonBinary = flag.String("poseidonPath", "", "The path to the Poseidon binary") nomadClient *nomadApi.Client nomadNamespace string ) @@ -42,8 +40,7 @@ const ( type E2ERecoveryTestSuite struct { suite.Suite - runnerID string - poseidonCancel context.CancelFunc + runnerID string } // Overwrite TestMain for custom setup. @@ -51,10 +48,6 @@ func TestMain(m *testing.M) { if err := config.InitConfig(); err != nil { log.WithError(err).Fatal("Could not initialize configuration") } - - if *poseidonBinary == "" { - log.Fatal("You must specify the -path to the Poseidon binary!") - } if *testDockerImage == "" { log.Fatal("You must specify the -dockerImage flag!") } @@ -77,20 +70,12 @@ func TestMain(m *testing.M) { func TestE2ERecoveryTests(t *testing.T) { testSuite := new(E2ERecoveryTestSuite) - ctx, cancelPoseidon := context.WithCancel(context.Background()) - testSuite.poseidonCancel = cancelPoseidon - - startPoseidon(ctx, cancelPoseidon) - waitForPoseidon() - e2e.CreateDefaultEnvironment(PrewarmingPoolSize, *testDockerImage) e2e.WaitForDefaultEnvironment() suite.Run(t, testSuite) TearDown() - testSuite.poseidonCancel() - <-time.After(tests.ShortTimeout) } func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() { @@ -99,6 +84,7 @@ func (s *E2ERecoveryTestSuite) TestInactivityTimer_Valid() { } func (s *E2ERecoveryTestSuite) TestInactivityTimer_Expired() { + waitForPoseidon() // The timeout begins only when the runner is recovered. <-time.After(InactivityTimeout * time.Second) _, err := e2e.ProvideWebSocketURL(s.runnerID, &dto.ExecutionRequest{Command: "true"}) s.Error(err) diff --git a/tests/recovery/setup_test.go b/tests/recovery/setup_test.go index 8db8de3..64ec3f9 100644 --- a/tests/recovery/setup_test.go +++ b/tests/recovery/setup_test.go @@ -1,15 +1,13 @@ package recovery import ( - "context" "github.com/openHPI/poseidon/internal/api" "github.com/openHPI/poseidon/pkg/dto" "github.com/openHPI/poseidon/tests" "github.com/openHPI/poseidon/tests/e2e" "github.com/openHPI/poseidon/tests/helpers" + "github.com/shirou/gopsutil/v3/process" "net/http" - "os" - "os/exec" "time" ) @@ -27,12 +25,8 @@ func (s *E2ERecoveryTestSuite) SetupTest() { } <-time.After(tests.ShortTimeout) - s.poseidonCancel() + killPoseidon() <-time.After(tests.ShortTimeout) - ctx, cancelPoseidon := context.WithCancel(context.Background()) - s.poseidonCancel = cancelPoseidon - startPoseidon(ctx, cancelPoseidon) - waitForPoseidon() } func TearDown() { @@ -43,16 +37,6 @@ func TearDown() { } } -func startPoseidon(ctx context.Context, cancelPoseidon context.CancelFunc) { - poseidon := exec.CommandContext(ctx, *poseidonBinary) //nolint:gosec // We accept that another binary can be executed. - poseidon.Stdout = os.Stdout - poseidon.Stderr = os.Stderr - if err := poseidon.Start(); err != nil { - cancelPoseidon() - log.WithError(err).Fatal("Failed to start Poseidon") - } -} - func waitForPoseidon() { done := false for !done { @@ -61,3 +45,24 @@ func waitForPoseidon() { done = err == nil && resp.StatusCode == http.StatusNoContent } } + +func killPoseidon() { + processes, err := process.Processes() + if err != nil { + log.WithError(err).Error("Error listing processes") + } + for _, p := range processes { + n, err := p.Name() + if err != nil { + continue + } + if n == "poseidon" { + err = p.Kill() + if err != nil { + log.WithError(err).Error("Error killing Poseidon") + } else { + log.Info("Killed Poseidon") + } + } + } +}