Merge pull request #1079 from openHPI/sync_execution_environments

Sync execution environments
This commit is contained in:
Sebastian Serth
2021-11-09 18:44:35 +01:00
committed by GitHub
11 changed files with 262 additions and 77 deletions

View File

@ -15,9 +15,7 @@ class ExecutionEnvironmentsController < ApplicationController
def create
@execution_environment = ExecutionEnvironment.new(execution_environment_params)
authorize!
create_and_respond(object: @execution_environment) do
sync_to_runner_management
end
create_and_respond(object: @execution_environment)
end
def destroy
@ -135,8 +133,8 @@ class ExecutionEnvironmentsController < ApplicationController
def set_docker_images
@docker_images ||= ExecutionEnvironment.pluck(:docker_image)
@docker_images += Runner.strategy_class.available_images
rescue Runner::Error::InternalServerError => e
flash[:warning] = e.message
rescue Runner::Error => e
flash[:warning] = html_escape e.message
ensure
@docker_images = @docker_images.sort.uniq
end
@ -165,9 +163,7 @@ class ExecutionEnvironmentsController < ApplicationController
end
def update
update_and_respond(object: @execution_environment, params: execution_environment_params) do
sync_to_runner_management
end
update_and_respond(object: @execution_environment, params: execution_environment_params)
end
def sync_all_to_runner_management
@ -175,20 +171,40 @@ class ExecutionEnvironmentsController < ApplicationController
return unless Runner.management_active?
success = ExecutionEnvironment.all.map do |execution_environment|
Runner.strategy_class.sync_environment(execution_environment)
success = []
begin
# Get a list of all existing execution environments and mark them as a potential candidate for removal
environments_to_remove = Runner.strategy_class.environments.pluck(:id)
success << true
rescue Runner::Error => e
Rails.logger.debug { "Runner error while getting all execution environments: #{e.message}" }
environments_to_remove = []
success << false
end
success += ExecutionEnvironment.all.map do |execution_environment|
# Sync all current execution environments and prevent deletion of those just synced
environments_to_remove -= [execution_environment.id]
Runner.strategy_class.sync_environment(execution_environment)
rescue Runner::Error => e
Rails.logger.debug { "Runner error while synchronizing execution environment with id #{execution_environment.id}: #{e.message}" }
false
end
success += environments_to_remove.map do |execution_environment_id|
# Remove execution environments not synced. We temporarily use a record which is not persisted
execution_environment = ExecutionEnvironment.new(id: execution_environment_id)
Runner.strategy_class.remove_environment(execution_environment)
rescue Runner::Error => e
Rails.logger.debug { "Runner error while deleting execution environment with id #{execution_environment.id}: #{e.message}" }
false
end
if success.all?
redirect_to ExecutionEnvironment, notice: t('execution_environments.index.synchronize_all.success')
else
redirect_to ExecutionEnvironment, alert: t('execution_environments.index.synchronize_all.failure')
end
end
def sync_to_runner_management
unless Runner.management_active? && Runner.strategy_class.sync_environment(@execution_environment)
t('execution_environments.form.errors.not_synced_to_runner_management')
end
end
private :sync_to_runner_management
end

View File

@ -19,8 +19,9 @@ class ExecutionEnvironment < ApplicationRecord
scope :with_exercises, -> { where('id IN (SELECT execution_environment_id FROM exercises)') }
before_validation :clean_exposed_ports
validate :valid_test_setup?
validate :working_docker_image?, if: :validate_docker_image?
validates :docker_image, presence: true
validates :memory_limit,
numericality: {greater_than_or_equal_to: MINIMUM_MEMORY_LIMIT, only_integer: true}, presence: true
@ -30,13 +31,13 @@ class ExecutionEnvironment < ApplicationRecord
validates :pool_size, numericality: {only_integer: true}, presence: true
validates :run_command, presence: true
validates :cpu_limit, presence: true, numericality: {greater_than: 0, only_integer: true}
before_validation :clean_exposed_ports
validates :exposed_ports, array: {numericality: {greater_than_or_equal_to: 0, less_than: 65_536, only_integer: true}}
def set_default_values
set_default_values_if_present(permitted_execution_time: 60, pool_size: 0)
end
private :set_default_values
after_destroy :delete_runner_environment
after_save :working_docker_image?, if: :validate_docker_image?
after_rollback :delete_runner_environment, on: :create
after_rollback :sync_runner_environment, on: %i[update destroy]
def to_s
name
@ -58,10 +59,15 @@ class ExecutionEnvironment < ApplicationRecord
exposed_ports.join(', ')
end
private
def set_default_values
set_default_values_if_present(permitted_execution_time: 60, pool_size: 0)
end
def clean_exposed_ports
self.exposed_ports = exposed_ports.uniq.sort
end
private :clean_exposed_ports
def valid_test_setup?
if test_command? ^ testing_framework?
@ -70,21 +76,49 @@ class ExecutionEnvironment < ApplicationRecord
attribute: I18n.t('activerecord.attributes.execution_environment.testing_framework')))
end
end
private :valid_test_setup?
def validate_docker_image?
docker_image.present? && !Rails.env.test?
# We only validate the code execution with the provided image if there is at least one container to test with.
pool_size.positive? && docker_image.present? && !Rails.env.test?
end
private :validate_docker_image?
def working_docker_image?
sync_runner_environment
retries = 0
begin
runner = Runner.for(author, self)
output = runner.execute_command(VALIDATION_COMMAND)
errors.add(:docker_image, "error: #{output[:stderr]}") if output[:stderr].present?
rescue Runner::Error::NotAvailable => e
Rails.logger.info("The Docker image could not be verified: #{e}")
rescue Runner::Error => e
# In case of an Runner::Error, we retry multiple times before giving up.
# The time between each retry increases to allow the runner management to catch up.
if retries < 5 && !Rails.env.test?
retries += 1
sleep retries
retry
elsif errors.exclude?(:docker_image)
errors.add(:docker_image, "error: #{e}")
raise ActiveRecord::RecordInvalid.new(self)
end
end
end
def delete_runner_environment
Runner.strategy_class.remove_environment(self)
rescue Runner::Error => e
unless errors.include?(:docker_image)
errors.add(:docker_image, "error: #{e}")
raise ActiveRecord::RecordInvalid.new(self)
end
end
def sync_runner_environment
previous_saved_environment = self.class.find(id)
Runner.strategy_class.sync_environment(previous_saved_environment)
rescue Runner::Error => e
unless errors.include?(:docker_image)
errors.add(:docker_image, "error: #{e}")
raise ActiveRecord::RecordInvalid.new(self)
end
end
private :working_docker_image?
end

View File

@ -140,16 +140,20 @@ class Runner < ApplicationRecord
rescue Runner::Error::EnvironmentNotFound
# Whenever the environment could not be found by the runner management, we
# try to synchronize it and then forward a more specific error to our callee.
if strategy_class.sync_environment(execution_environment)
begin
strategy_class.sync_environment(execution_environment)
rescue Runner::Error
# An additional error was raised during synchronization
raise Runner::Error::EnvironmentNotFound.new(
"The execution environment with id #{execution_environment.id} was not found by the runner management. "\
'In addition, it could not be synced so that this probably indicates a permanent error.'
)
else
# No error was raised during synchronization
raise Runner::Error::EnvironmentNotFound.new(
"The execution environment with id #{execution_environment.id} was not found yet by the runner management. "\
'It has been successfully synced now so that the next request should be successful.'
)
else
raise Runner::Error::EnvironmentNotFound.new(
"The execution environment with id #{execution_environment.id} was not found by the runner management."\
'In addition, it could not be synced so that this probably indicates a permanent error.'
)
end
end
end

View File

@ -9,10 +9,18 @@ class Runner::Strategy
raise NotImplementedError
end
def self.environments
raise NotImplementedError
end
def self.sync_environment(_environment)
raise NotImplementedError
end
def self.remove_environment(_environment)
raise NotImplementedError
end
def self.request_from_management(_environment)
raise NotImplementedError
end

View File

@ -14,17 +14,40 @@ class Runner::Strategy::DockerContainerPool < Runner::Strategy
FileUtils.mkdir_p(File.expand_path(config[:workspace_root]))
end
def self.sync_environment(environment)
# There is no dedicated sync mechanism yet. However, we need to emit a warning when the pool was previously
# empty for this execution environment. In this case the validation command probably was not executed.
return true unless environment.pool_size_previously_changed?
case environment.pool_size_previously_was
when nil, 0
false
else
true
def self.environments
pool_size.keys.map {|key| {id: key} }
end
def self.sync_environment(environment)
# Force a database commit and start a new transaction.
if environment.class.connection.transaction_open?
environment.class.connection.commit_db_transaction
environment.class.connection.begin_db_transaction
end
url = "#{config[:url]}/docker_container_pool/refill_environment/#{environment.id}"
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Refilling execution environment at #{url}" }
response = Faraday.post(url)
return true if response.success?
raise Runner::Error::UnexpectedResponse.new("Could not refill execution environment in DockerContainerPool, got response: #{response.as_json}")
rescue Faraday::Error => e
raise Runner::Error::FaradayError.new("Request to DockerContainerPool failed: #{e.inspect}")
ensure
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished refilling environment" }
end
def self.remove_environment(environment)
url = "#{config[:url]}/docker_container_pool/purge_environment/#{environment.id}"
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Purging execution environment at #{url}" }
response = Faraday.delete(url)
return true if response.success?
raise Runner::Error::UnexpectedResponse.new("Could not delete execution environment in DockerContainerPool, got response: #{response.as_json}")
rescue Faraday::Error => e
raise Runner::Error::FaradayError.new("Request to DockerContainerPool failed: #{e.inspect}")
ensure
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished purging environment" }
end
def self.request_from_management(environment)
@ -49,7 +72,10 @@ class Runner::Strategy::DockerContainerPool < Runner::Strategy
def destroy_at_management
url = "#{self.class.config[:url]}/docker_container_pool/destroy_container/#{container.id}"
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Destroying runner at #{url}" }
Faraday.delete(url)
response = Faraday.delete(url)
return true if response.success?
raise Runner::Error::UnexpectedResponse.new("Could not delete execution environment in DockerContainerPool, got response: #{response.as_json}")
rescue Faraday::Error => e
raise Runner::Error::FaradayError.new("Request to DockerContainerPool failed: #{e.inspect}")
ensure

View File

@ -5,7 +5,17 @@
class Runner::Strategy::Null < Runner::Strategy
def self.initialize_environment; end
def self.sync_environment(_environment); end
def self.environments
raise Runner::Error.new
end
def self.sync_environment(_environment)
raise Runner::Error.new
end
def self.remove_environment(_environment)
raise Runner::Error.new
end
def self.request_from_management(_environment)
SecureRandom.uuid

View File

@ -19,16 +19,51 @@ class Runner::Strategy::Poseidon < Runner::Strategy
nil
end
def self.environments
url = "#{config[:url]}/execution-environments"
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Getting list of execution environments at #{url}" }
response = http_connection.get url
case response.status
when 200
response_body = parse response
execution_environments = response_body[:executionEnvironments]
execution_environments.presence || raise(Runner::Error::UnexpectedResponse.new("Could not get the list of execution environments in Poseidon, got response: #{response.as_json}"))
when 404
raise Runner::Error::EnvironmentNotFound.new
else
handle_error response
end
rescue Faraday::Error => e
raise Runner::Error::FaradayError.new("Could not get the list of execution environments because of Faraday error: #{e.inspect}")
ensure
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished getting the list of execution environments" }
end
def self.sync_environment(environment)
url = "#{config[:url]}/execution-environments/#{environment.id}"
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Synchronizing execution environment at #{url}" }
response = http_connection.put url, environment.to_json
return true if [201, 204].include? response.status
Rails.logger.warn("Could not create execution environment in Poseidon, got response: #{response.as_json}")
false
raise Runner::Error::UnexpectedResponse.new("Could not synchronize execution environment in Poseidon, got response: #{response.as_json}")
rescue Faraday::Error => e
Rails.logger.warn("Could not create execution environment because of Faraday error: #{e.inspect}")
false
raise Runner::Error::FaradayError.new("Could not synchronize execution environment because of Faraday error: #{e.inspect}")
ensure
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished synchronizing execution environment" }
end
def self.remove_environment(environment)
url = "#{config[:url]}/execution-environments/#{environment.id}"
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Deleting execution environment at #{url}" }
response = http_connection.delete url
return true if response.status == 204
raise Runner::Error::UnexpectedResponse.new("Could not delete execution environment in Poseidon, got response: #{response.as_json}")
rescue Faraday::Error => e
raise Runner::Error::FaradayError.new("Could not delete execution environment because of Faraday error: #{e.inspect}")
ensure
Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished deleting execution environment" }
end
def self.request_from_management(environment)

View File

@ -8,15 +8,21 @@ describe ExecutionEnvironmentsController do
before do
allow(controller).to receive(:current_user).and_return(user)
allow(controller).to receive(:sync_to_runner_management).and_return(nil)
allow(Runner.strategy_class).to receive(:available_images).and_return([])
end
describe 'POST #create' do
context 'with a valid execution environment' do
let(:perform_request) { proc { post :create, params: {execution_environment: FactoryBot.build(:ruby).attributes} } }
let(:perform_request) { proc { post :create, params: {execution_environment: FactoryBot.build(:ruby, pool_size: 1).attributes} } }
before { perform_request.call }
before do
allow(Rails.env).to receive(:test?).and_return(false, true)
allow(Runner.strategy_class).to receive(:sync_environment).and_return(true)
runner = instance_double 'runner'
allow(Runner).to receive(:for).and_return(runner)
allow(runner).to receive(:execute_command).and_return({})
perform_request.call
end
expect_assigns(docker_images: Array)
expect_assigns(execution_environment: ExecutionEnvironment)
@ -26,27 +32,34 @@ describe ExecutionEnvironmentsController do
end
it 'registers the execution environment with the runner management' do
expect(controller).to have_received(:sync_to_runner_management)
expect(Runner.strategy_class).to have_received(:sync_environment)
end
expect_redirect(ExecutionEnvironment.last)
end
context 'with an invalid execution environment' do
before { post :create, params: {execution_environment: {}} }
before do
allow(Runner.strategy_class).to receive(:sync_environment).and_return(true)
allow(Rails.env).to receive(:test?).and_return(false, true)
post :create, params: {execution_environment: {}}
end
expect_assigns(execution_environment: ExecutionEnvironment)
expect_status(200)
expect_template(:new)
it 'does not register the execution environment with the runner management' do
expect(controller).not_to have_received(:sync_to_runner_management)
expect(Runner.strategy_class).not_to have_received(:sync_environment)
end
end
end
describe 'DELETE #destroy' do
before { delete :destroy, params: {id: execution_environment.id} }
before do
allow(Runner.strategy_class).to receive(:remove_environment).and_return(true)
delete :destroy, params: {id: execution_environment.id}
end
expect_assigns(execution_environment: :execution_environment)
@ -55,6 +68,10 @@ describe ExecutionEnvironmentsController do
expect { delete :destroy, params: {id: execution_environment.id} }.to change(ExecutionEnvironment, :count).by(-1)
end
it 'removes the execution environment from the runner management' do
expect(Runner.strategy_class).to have_received(:remove_environment)
end
expect_redirect(:execution_environments)
end
@ -164,8 +181,12 @@ describe ExecutionEnvironmentsController do
describe 'PUT #update' do
context 'with a valid execution environment' do
before do
allow(controller).to receive(:sync_to_runner_management).and_return(nil)
put :update, params: {execution_environment: FactoryBot.attributes_for(:ruby), id: execution_environment.id}
allow(Rails.env).to receive(:test?).and_return(false, true)
allow(Runner.strategy_class).to receive(:sync_environment).and_return(true)
runner = instance_double 'runner'
allow(Runner).to receive(:for).and_return(runner)
allow(runner).to receive(:execute_command).and_return({})
put :update, params: {execution_environment: FactoryBot.attributes_for(:ruby, pool_size: 1), id: execution_environment.id}
end
expect_assigns(docker_images: Array)
@ -173,25 +194,30 @@ describe ExecutionEnvironmentsController do
expect_redirect(:execution_environment)
it 'updates the execution environment at the runner management' do
expect(controller).to have_received(:sync_to_runner_management)
expect(Runner.strategy_class).to have_received(:sync_environment)
end
end
context 'with an invalid execution environment' do
before { put :update, params: {execution_environment: {name: ''}, id: execution_environment.id} }
before do
allow(Runner.strategy_class).to receive(:sync_environment).and_return(true)
allow(Rails.env).to receive(:test?).and_return(true, false, true)
put :update, params: {execution_environment: {name: ''}, id: execution_environment.id}
end
expect_assigns(execution_environment: ExecutionEnvironment)
expect_status(200)
expect_template(:edit)
it 'does not update the execution environment at the runner management' do
expect(controller).not_to have_received(:sync_to_runner_management)
expect(Runner.strategy_class).not_to have_received(:sync_environment)
end
end
end
describe '#sync_all_to_runner_management' do
let(:execution_environments) { FactoryBot.build_list(:ruby, 3) }
let(:execution_environments) { %i[ruby java python].map {|environment| FactoryBot.create(environment) } }
let(:outdated_execution_environments) { %i[node_js html].map {|environment| FactoryBot.build_stubbed(environment) } }
let(:codeocean_config) { instance_double(CodeOcean::Config) }
let(:runner_management_config) { {runner_management: {enabled: true, strategy: :poseidon}} }
@ -204,11 +230,19 @@ describe ExecutionEnvironmentsController do
end
it 'copies all execution environments to the runner management' do
allow(ExecutionEnvironment).to receive(:all).and_return(execution_environments)
allow(Runner::Strategy::Poseidon).to receive(:environments).and_return(outdated_execution_environments)
expect(Runner::Strategy::Poseidon).to receive(:environments).once
execution_environments.each do |execution_environment|
allow(Runner::Strategy::Poseidon).to receive(:sync_environment).with(execution_environment).and_return(true)
expect(Runner::Strategy::Poseidon).to receive(:sync_environment).with(execution_environment).once
expect(Runner::Strategy::Poseidon).not_to receive(:remove_environment).with(execution_environment)
end
outdated_execution_environments.each do |execution_environment|
allow(Runner::Strategy::Poseidon).to receive(:remove_environment).with(execution_environment).and_return(true)
expect(Runner::Strategy::Poseidon).to receive(:remove_environment).with(execution_environment).once
expect(Runner::Strategy::Poseidon).not_to receive(:sync_environment).with(execution_environment)
end
post :sync_all_to_runner_management

View File

@ -9,6 +9,16 @@ describe Runner::Strategy::Poseidon do
let(:error_message) { 'test error message' }
let(:response_body) { nil }
let(:codeocean_config) { instance_double(CodeOcean::Config) }
let(:runner_management_config) { {runner_management: {enabled: true, strategy: :poseidon, url: 'https://runners.example.org', unused_runner_expiration_time: 180}} }
before do
# Ensure to reset the memorized helper
Runner.instance_variable_set :@strategy_class, nil
allow(CodeOcean::Config).to receive(:new).with(:code_ocean).and_return(codeocean_config)
allow(codeocean_config).to receive(:read).and_return(runner_management_config)
end
# All requests handle a BadRequest (400) response the same way.
shared_examples 'BadRequest (400) error handling' do
context 'when Poseidon returns BadRequest (400)' do
@ -141,11 +151,11 @@ describe Runner::Strategy::Poseidon do
end
shared_examples 'returns false when the api request failed' do |status|
it "returns false on status #{status}" do
it "raises an exception on status #{status}" do
faraday_connection = instance_double 'Faraday::Connection'
allow(described_class).to receive(:http_connection).and_return(faraday_connection)
allow(faraday_connection).to receive(:put).and_return(Faraday::Response.new(status: status))
expect(action.call).to be_falsey
expect { action.call }.to raise_exception Runner::Error::UnexpectedResponse
end
end
@ -157,11 +167,11 @@ describe Runner::Strategy::Poseidon do
include_examples 'returns false when the api request failed', status
end
it 'returns false if Faraday raises an error' do
it 'raises an exception if Faraday raises an error' do
faraday_connection = instance_double 'Faraday::Connection'
allow(described_class).to receive(:http_connection).and_return(faraday_connection)
allow(faraday_connection).to receive(:put).and_raise(Faraday::TimeoutError)
expect(action.call).to be_falsey
expect { action.call }.to raise_exception Runner::Error::FaradayError
end
end

View File

@ -8,7 +8,7 @@ describe ExecutionEnvironment do
it 'validates that the Docker image works' do
allow(execution_environment).to receive(:validate_docker_image?).and_return(true)
allow(execution_environment).to receive(:working_docker_image?).and_return(true)
execution_environment.update(docker_image: FactoryBot.attributes_for(:ruby)[:docker_image])
execution_environment.update(FactoryBot.build(:ruby).attributes)
expect(execution_environment).to have_received(:working_docker_image?)
end
@ -138,18 +138,26 @@ describe ExecutionEnvironment do
expect(execution_environment.send(:validate_docker_image?)).to be false
end
it 'is false when the pool size is empty' do
expect(execution_environment.pool_size).to be 0
expect(execution_environment.send(:validate_docker_image?)).to be false
end
it 'is true otherwise' do
execution_environment.docker_image = FactoryBot.attributes_for(:ruby)[:docker_image]
execution_environment.pool_size = 1
allow(Rails.env).to receive(:test?).and_return(false)
expect(execution_environment.send(:validate_docker_image?)).to be true
end
end
describe '#working_docker_image?' do
let(:execution_environment) { FactoryBot.create(:ruby) }
let(:working_docker_image?) { execution_environment.send(:working_docker_image?) }
let(:runner) { instance_double 'runner' }
before do
allow(execution_environment).to receive(:sync_runner_environment).and_return(true)
allow(Runner).to receive(:for).with(execution_environment.author, execution_environment).and_return runner
end
@ -176,7 +184,7 @@ describe ExecutionEnvironment do
context 'when the Docker client produces an error' do
it 'adds an error' do
allow(runner).to receive(:execute_command).and_raise(Runner::Error)
working_docker_image?
expect { working_docker_image? }.to raise_error(ActiveRecord::RecordInvalid)
expect(execution_environment.errors[:docker_image]).to be_present
end
end

View File

@ -233,7 +233,7 @@ describe Runner do
end
it 'raises an error when the environment could not be synced' do
allow(strategy_class).to receive(:sync_environment).with(runner.execution_environment).and_return(false)
allow(strategy_class).to receive(:sync_environment).with(runner.execution_environment).and_raise(Runner::Error::EnvironmentNotFound)
expect { runner.send(:request_new_id) }.to raise_error(Runner::Error::EnvironmentNotFound, /#{environment_id}.*could not be synced/)
end
end