Block Webserver during first Nomad recovery.
No requests are accepted while Poseidon is recovering Nomad environments and runners.
This commit is contained in:
@ -84,11 +84,27 @@ func (m *NomadRunnerManager) Return(r Runner) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// SynchronizeRunners loads all runners and keeps them synchronized (without a retry mechanism).
|
||||
func (m *NomadRunnerManager) SynchronizeRunners(ctx context.Context) error {
|
||||
// Load recovers all runners for all existing environments.
|
||||
func (m *NomadRunnerManager) Load() {
|
||||
log.Info("Loading runners")
|
||||
m.load()
|
||||
newUsedRunners := storage.NewLocalStorage[Runner]()
|
||||
for _, environment := range m.ListEnvironments() {
|
||||
usedRunners, err := m.loadEnvironment(environment)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField(dto.KeyEnvironmentID, environment.ID().ToString()).
|
||||
Warn("Failed loading environment. Skipping...")
|
||||
continue
|
||||
}
|
||||
for _, r := range usedRunners.List() {
|
||||
newUsedRunners.Add(r.ID(), r)
|
||||
}
|
||||
}
|
||||
|
||||
m.updateUsedRunners(newUsedRunners, true)
|
||||
}
|
||||
|
||||
// SynchronizeRunners connect once (without retry) to Nomad to receive status updates regarding runners.
|
||||
func (m *NomadRunnerManager) SynchronizeRunners(ctx context.Context) error {
|
||||
// Watch for changes regarding the existing or new runners.
|
||||
log.Info("Watching Event Stream")
|
||||
err := m.apiClient.WatchEventStream(ctx,
|
||||
@ -167,24 +183,6 @@ func (m *NomadRunnerManager) checkPrewarmingPoolAlert(environment ExecutionEnvir
|
||||
}
|
||||
}
|
||||
|
||||
// Load recovers all runners for all existing environments.
|
||||
func (m *NomadRunnerManager) load() {
|
||||
newUsedRunners := storage.NewLocalStorage[Runner]()
|
||||
for _, environment := range m.ListEnvironments() {
|
||||
usedRunners, err := m.loadEnvironment(environment)
|
||||
if err != nil {
|
||||
log.WithError(err).WithField(dto.KeyEnvironmentID, environment.ID().ToString()).
|
||||
Warn("Failed loading environment. Skipping...")
|
||||
continue
|
||||
}
|
||||
for _, r := range usedRunners.List() {
|
||||
newUsedRunners.Add(r.ID(), r)
|
||||
}
|
||||
}
|
||||
|
||||
m.updateUsedRunners(newUsedRunners, true)
|
||||
}
|
||||
|
||||
func (m *NomadRunnerManager) loadEnvironment(environment ExecutionEnvironment) (used storage.Storage[Runner], err error) {
|
||||
used = storage.NewLocalStorage[Runner]()
|
||||
runnerJobs, err := m.apiClient.LoadRunnerJobs(environment.ID())
|
||||
|
@ -265,7 +265,7 @@ func (s *ManagerTestSuite) TestUpdateRunnersLogsErrorFromWatchAllocation() {
|
||||
log.WithError(err).Error("failed to synchronize runners")
|
||||
}
|
||||
|
||||
s.Require().Equal(3, len(hook.Entries))
|
||||
s.Require().Equal(2, len(hook.Entries))
|
||||
s.Equal(logrus.ErrorLevel, hook.LastEntry().Level)
|
||||
err, ok := hook.LastEntry().Data[logrus.ErrorKey].(error)
|
||||
s.Require().True(ok)
|
||||
@ -531,7 +531,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
|
||||
s.ExpectedGoroutingIncrease++ // We dont care about destroying the created runner.
|
||||
call.Return([]*nomadApi.Job{job}, nil)
|
||||
|
||||
runnerManager.load()
|
||||
runnerManager.Load()
|
||||
environmentMock.AssertExpectations(s.T())
|
||||
})
|
||||
|
||||
@ -548,7 +548,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
|
||||
call.Return([]*nomadApi.Job{job}, nil)
|
||||
|
||||
s.Require().Zero(runnerManager.usedRunners.Length())
|
||||
runnerManager.load()
|
||||
runnerManager.Load()
|
||||
_, ok := runnerManager.usedRunners.Get(tests.DefaultRunnerID)
|
||||
s.True(ok)
|
||||
})
|
||||
@ -570,7 +570,7 @@ func (s *MainTestSuite) TestNomadRunnerManager_Load() {
|
||||
call.Return([]*nomadApi.Job{job}, nil)
|
||||
|
||||
s.Require().Zero(runnerManager.usedRunners.Length())
|
||||
runnerManager.load()
|
||||
runnerManager.Load()
|
||||
s.Require().NotZero(runnerManager.usedRunners.Length())
|
||||
|
||||
<-time.After(time.Duration(timeout*2) * time.Second)
|
||||
|
Reference in New Issue
Block a user