Configure Systemd Watchdog

that monitors the reachability of Poseidon and automatically restarts Poseidon if required.
This commit is contained in:
Maximilian Paß
2023-12-05 20:28:25 +01:00
committed by Sebastian Serth
parent 2d34854450
commit b48c7fe8b6
6 changed files with 122 additions and 12 deletions

View File

@ -14,6 +14,9 @@ import (
"github.com/stretchr/testify/suite"
"net/http"
"os"
"os/exec"
"strconv"
"strings"
"testing"
"time"
)
@ -120,3 +123,19 @@ func (s *E2ERecoveryTestSuite) TestEnvironmentStatistics() {
s.Equal(uint(PrewarmingPoolSize), environmentStatistics.IdleRunners)
s.Equal(uint(1), environmentStatistics.UsedRunners)
}
func (s *E2ERecoveryTestSuite) TestWatchdogNotifications() {
// Wait for `WatchdogSec` to be passed.
<-time.After((5 + 1) * time.Second)
// If the Watchdog has not received the notification by now it will restart Poseidon.
cmd := exec.Command("/usr/bin/systemctl", "--user", "show", "poseidon.service", "-p", "NRestarts")
s.Require().NoError(cmd.Err)
out, err := cmd.Output()
s.Require().NoError(err)
restarts, err := strconv.Atoi(strings.Trim(strings.ReplaceAll(string(out), "NRestarts=", ""), "\n"))
s.Require().NoError(err)
// If Poseidon would not notify the systemd watchdog, we would have one more restart than expected.
s.Equal(PoseidonRestartCount, restarts)
}

View File

@ -46,6 +46,8 @@ func waitForPoseidon() {
}
}
var PoseidonRestartCount = 0
func killPoseidon() {
processes, err := process.Processes()
if err != nil {
@ -62,6 +64,7 @@ func killPoseidon() {
log.WithError(err).Error("Error killing Poseidon")
} else {
log.Info("Killed Poseidon")
PoseidonRestartCount++
}
}
}