Handle OutOfMemory runner errors with gVisor
This commit is contained in:

committed by
Sebastian Serth

parent
567694fe03
commit
fd2d94568a
@ -99,8 +99,7 @@ class SubmissionsController < ApplicationController
|
||||
end
|
||||
end
|
||||
|
||||
# rubocop:disable Metrics/CyclomaticComplexity
|
||||
def run
|
||||
def run # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
||||
# These method-local socket variables are required in order to use one socket
|
||||
# in the callbacks of the other socket. As the callbacks for the client socket
|
||||
# are registered first, the runner socket may still be nil.
|
||||
@ -199,12 +198,6 @@ class SubmissionsController < ApplicationController
|
||||
end
|
||||
stream = @testrun[:status] == :ok ? :stdout : :stderr
|
||||
send_and_store client_socket, {cmd: :write, stream:, data: "#{exit_statement}\n"}
|
||||
if exit_code == 137
|
||||
send_and_store client_socket, {cmd: :status, status: :out_of_memory}
|
||||
@testrun[:status] = :out_of_memory
|
||||
end
|
||||
|
||||
# The client connection will be closed once the file listing finished.
|
||||
end
|
||||
|
||||
runner_socket.on :files do |files|
|
||||
@ -213,30 +206,33 @@ class SubmissionsController < ApplicationController
|
||||
js_tree = FileTree.new(downloadable_files).to_js_tree
|
||||
send_and_store client_socket, {cmd: :files, data: js_tree}
|
||||
end
|
||||
|
||||
close_client_connection(client_socket)
|
||||
end
|
||||
end
|
||||
@testrun[:container_execution_time] = durations[:execution_duration]
|
||||
@testrun[:waiting_for_container_time] = durations[:waiting_duration]
|
||||
rescue Runner::Error::ExecutionTimeout => e
|
||||
send_and_store client_socket, {cmd: :status, status: :timeout}
|
||||
close_client_connection(client_socket)
|
||||
Rails.logger.debug { "Running a submission timed out: #{e.message}" }
|
||||
@testrun[:status] ||= :timeout
|
||||
@testrun[:output] = "timeout: #{@testrun[:output]}"
|
||||
extract_durations(e)
|
||||
rescue Runner::Error::OutOfMemory => e
|
||||
send_and_store client_socket, {cmd: :status, status: :out_of_memory}
|
||||
Rails.logger.debug { "Running a submission caused an out of memory error: #{e.message}" }
|
||||
@testrun[:status] ||= :out_of_memory
|
||||
@testrun[:exit_code] ||= 137
|
||||
@testrun[:output] = "out_of_memory: #{@testrun[:output]}"
|
||||
extract_durations(e)
|
||||
rescue Runner::Error => e
|
||||
# Regardless of the specific error cause, we send a `container_depleted` status to the client.
|
||||
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
||||
close_client_connection(client_socket)
|
||||
@testrun[:status] ||= :container_depleted
|
||||
Rails.logger.debug { "Runner error while running a submission: #{e.message}" }
|
||||
extract_durations(e)
|
||||
ensure
|
||||
close_client_connection(client_socket)
|
||||
save_testrun_output 'run'
|
||||
end
|
||||
# rubocop:enable Metrics/CyclomaticComplexity:
|
||||
|
||||
def score
|
||||
client_socket = nil
|
||||
@ -256,14 +252,14 @@ class SubmissionsController < ApplicationController
|
||||
client_socket&.send_data(JSON.dump(@submission.calculate_score))
|
||||
# To enable hints when scoring a submission, uncomment the next line:
|
||||
# send_hints(client_socket, StructuredError.where(submission: @submission))
|
||||
kill_client_socket(client_socket)
|
||||
rescue Runner::Error => e
|
||||
extract_durations(e)
|
||||
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
||||
kill_client_socket(client_socket)
|
||||
Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" }
|
||||
@testrun[:passed] = false
|
||||
save_testrun_output 'assess'
|
||||
ensure
|
||||
kill_client_socket(client_socket)
|
||||
end
|
||||
|
||||
def create
|
||||
@ -289,7 +285,6 @@ class SubmissionsController < ApplicationController
|
||||
|
||||
# The score is stored separately, we can forward it to the client immediately
|
||||
client_socket&.send_data(JSON.dump(@submission.test(@file)))
|
||||
kill_client_socket(client_socket)
|
||||
rescue Runner::Error => e
|
||||
extract_durations(e)
|
||||
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
||||
@ -297,6 +292,8 @@ class SubmissionsController < ApplicationController
|
||||
Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" }
|
||||
@testrun[:passed] = false
|
||||
save_testrun_output 'assess'
|
||||
ensure
|
||||
kill_client_socket(client_socket)
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -24,6 +24,8 @@ class Runner
|
||||
|
||||
class WorkspaceError < Error; end
|
||||
|
||||
class OutOfMemory < Error; end
|
||||
|
||||
class Unknown < Error; end
|
||||
end
|
||||
end
|
||||
|
@ -137,6 +137,9 @@ class Runner < ApplicationRecord
|
||||
rescue Runner::Error::ExecutionTimeout => e
|
||||
Rails.logger.debug { "Running command `#{command}` timed out: #{e.message}" }
|
||||
output.merge!(status: :timeout, container_execution_time: e.execution_duration)
|
||||
rescue Runner::Error::OutOfMemory => e
|
||||
Rails.logger.debug { "Running command `#{command}` caused an out of memory error: #{e.message}" }
|
||||
output.merge!(status: :out_of_memory, container_execution_time: e.execution_duration)
|
||||
rescue Runner::Error::RunnerNotFound => e
|
||||
Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" }
|
||||
try += 1
|
||||
|
@ -161,9 +161,19 @@ class Runner::Connection
|
||||
# However, it might not be required for Poseidon.
|
||||
@strategy.destroy_at_management
|
||||
@error = Runner::Error::ExecutionTimeout.new('Execution exceeded its time limit')
|
||||
when :out_of_memory
|
||||
# This status is only used by Poseidon (with gVisor).
|
||||
# The runner will be destroyed (and recreated) automatically.
|
||||
@error = Runner::Error::OutOfMemory.new('Execution exceeded its memory limit')
|
||||
when :terminated_by_codeocean, :terminated_by_management
|
||||
@exit_callback.call @exit_code
|
||||
list_filesystem
|
||||
# Poseidon (without gVisor) and DockerContainerPool do not handle memory limits explicitly.
|
||||
# Instead, they signal that the program was terminated with exit code 137 (128 + 9).
|
||||
if @exit_code == 137
|
||||
@error = Runner::Error::OutOfMemory.new('Execution exceeded its memory limit')
|
||||
else
|
||||
@exit_callback.call @exit_code
|
||||
list_filesystem
|
||||
end
|
||||
when :terminated_by_client, :error
|
||||
@strategy.destroy_at_management
|
||||
else # :established
|
||||
@ -223,6 +233,11 @@ class Runner::Connection
|
||||
end
|
||||
|
||||
def handle_error(event)
|
||||
# Poseidon (with gVisor enabled!) sends an error message when the execution exceeds its memory limit.
|
||||
# This is not an error in the sense of the runner management but rather a message.
|
||||
# We handle it here to avoid the error handling in the default case.
|
||||
return @status = :out_of_memory if event[:data] == 'the allocation was OOM Killed'
|
||||
|
||||
# In case of a (Nomad) error during execution, the runner management will notify us with an error message here.
|
||||
# This shouldn't happen too often and can be considered an internal server error by the runner management.
|
||||
# More information is available in the logs of the runner management or the orchestrator (e.g., Nomad).
|
||||
|
@ -161,6 +161,7 @@ describe SubmissionsController do
|
||||
context 'when no errors occur during execution' do
|
||||
before do
|
||||
allow_any_instance_of(described_class).to receive(:hijack)
|
||||
allow_any_instance_of(described_class).to receive(:close_client_connection)
|
||||
allow_any_instance_of(Submission).to receive(:run).and_return({})
|
||||
allow_any_instance_of(described_class).to receive(:save_testrun_output)
|
||||
perform_request
|
||||
|
Reference in New Issue
Block a user