Block Webserver during first Nomad recovery.

No requests are accepted while Poseidon is recovering Nomad environments and runners.
This commit is contained in:
Maximilian Paß
2023-12-04 13:31:56 +01:00
parent 0e00957adb
commit eaa022282c
4 changed files with 69 additions and 27 deletions

View File

@ -84,11 +84,27 @@ func (m *NomadRunnerManager) Return(r Runner) error {
return err
}
// SynchronizeRunners loads all runners and keeps them synchronized (without a retry mechanism).
func (m *NomadRunnerManager) SynchronizeRunners(ctx context.Context) error {
// Load recovers all runners for all existing environments.
func (m *NomadRunnerManager) Load() {
log.Info("Loading runners")
m.load()
newUsedRunners := storage.NewLocalStorage[Runner]()
for _, environment := range m.ListEnvironments() {
usedRunners, err := m.loadEnvironment(environment)
if err != nil {
log.WithError(err).WithField(dto.KeyEnvironmentID, environment.ID().ToString()).
Warn("Failed loading environment. Skipping...")
continue
}
for _, r := range usedRunners.List() {
newUsedRunners.Add(r.ID(), r)
}
}
m.updateUsedRunners(newUsedRunners, true)
}
// SynchronizeRunners connect once (without retry) to Nomad to receive status updates regarding runners.
func (m *NomadRunnerManager) SynchronizeRunners(ctx context.Context) error {
// Watch for changes regarding the existing or new runners.
log.Info("Watching Event Stream")
err := m.apiClient.WatchEventStream(ctx,
@ -167,24 +183,6 @@ func (m *NomadRunnerManager) checkPrewarmingPoolAlert(environment ExecutionEnvir
}
}
// Load recovers all runners for all existing environments.
func (m *NomadRunnerManager) load() {
newUsedRunners := storage.NewLocalStorage[Runner]()
for _, environment := range m.ListEnvironments() {
usedRunners, err := m.loadEnvironment(environment)
if err != nil {
log.WithError(err).WithField(dto.KeyEnvironmentID, environment.ID().ToString()).
Warn("Failed loading environment. Skipping...")
continue
}
for _, r := range usedRunners.List() {
newUsedRunners.Add(r.ID(), r)
}
}
m.updateUsedRunners(newUsedRunners, true)
}
func (m *NomadRunnerManager) loadEnvironment(environment ExecutionEnvironment) (used storage.Storage[Runner], err error) {
used = storage.NewLocalStorage[Runner]()
runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID())

View File

@ -265,7 +265,7 @@ func (s *ManagerTestSuite) TestUpdateRunnersLogsErrorFromWatchAllocation() {
log.WithError(err).Error("failed to synchronize runners")
}
s.Require().Equal(3, len(hook.Entries))
s.Require().Equal(2, len(hook.Entries))
s.Equal(logrus.ErrorLevel, hook.LastEntry().Level)
err, ok := hook.LastEntry().Data[logrus.ErrorKey].(error)
s.Require().True(ok)
@ -531,7 +531,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
s.ExpectedGoroutingIncrease++ // We dont care about destroying the created runner.
call.Return([]*nomadApi.Job{job}, nil)
runnerManager.load()
runnerManager.Load()
environmentMock.AssertExpectations(s.T())
})
@ -548,7 +548,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
call.Return([]*nomadApi.Job{job}, nil)
s.Require().Zero(runnerManager.usedRunners.Length())
runnerManager.load()
runnerManager.Load()
_, ok := runnerManager.usedRunners.Get(tests.DefaultRunnerID)
s.True(ok)
})
@ -570,7 +570,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
call.Return([]*nomadApi.Job{job}, nil)
s.Require().Zero(runnerManager.usedRunners.Length())
runnerManager.load()
runnerManager.Load()
s.Require().NotZero(runnerManager.usedRunners.Length())
<-time.After(time.Duration(timeout*2) * time.Second)