Handle OutOfMemory runner errors with gVisor
This commit is contained in:

committed by
Sebastian Serth

parent
567694fe03
commit
fd2d94568a
@ -99,8 +99,7 @@ class SubmissionsController < ApplicationController
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# rubocop:disable Metrics/CyclomaticComplexity
|
def run # rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
||||||
def run
|
|
||||||
# These method-local socket variables are required in order to use one socket
|
# These method-local socket variables are required in order to use one socket
|
||||||
# in the callbacks of the other socket. As the callbacks for the client socket
|
# in the callbacks of the other socket. As the callbacks for the client socket
|
||||||
# are registered first, the runner socket may still be nil.
|
# are registered first, the runner socket may still be nil.
|
||||||
@ -199,12 +198,6 @@ class SubmissionsController < ApplicationController
|
|||||||
end
|
end
|
||||||
stream = @testrun[:status] == :ok ? :stdout : :stderr
|
stream = @testrun[:status] == :ok ? :stdout : :stderr
|
||||||
send_and_store client_socket, {cmd: :write, stream:, data: "#{exit_statement}\n"}
|
send_and_store client_socket, {cmd: :write, stream:, data: "#{exit_statement}\n"}
|
||||||
if exit_code == 137
|
|
||||||
send_and_store client_socket, {cmd: :status, status: :out_of_memory}
|
|
||||||
@testrun[:status] = :out_of_memory
|
|
||||||
end
|
|
||||||
|
|
||||||
# The client connection will be closed once the file listing finished.
|
|
||||||
end
|
end
|
||||||
|
|
||||||
runner_socket.on :files do |files|
|
runner_socket.on :files do |files|
|
||||||
@ -213,30 +206,33 @@ class SubmissionsController < ApplicationController
|
|||||||
js_tree = FileTree.new(downloadable_files).to_js_tree
|
js_tree = FileTree.new(downloadable_files).to_js_tree
|
||||||
send_and_store client_socket, {cmd: :files, data: js_tree}
|
send_and_store client_socket, {cmd: :files, data: js_tree}
|
||||||
end
|
end
|
||||||
|
|
||||||
close_client_connection(client_socket)
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@testrun[:container_execution_time] = durations[:execution_duration]
|
@testrun[:container_execution_time] = durations[:execution_duration]
|
||||||
@testrun[:waiting_for_container_time] = durations[:waiting_duration]
|
@testrun[:waiting_for_container_time] = durations[:waiting_duration]
|
||||||
rescue Runner::Error::ExecutionTimeout => e
|
rescue Runner::Error::ExecutionTimeout => e
|
||||||
send_and_store client_socket, {cmd: :status, status: :timeout}
|
send_and_store client_socket, {cmd: :status, status: :timeout}
|
||||||
close_client_connection(client_socket)
|
|
||||||
Rails.logger.debug { "Running a submission timed out: #{e.message}" }
|
Rails.logger.debug { "Running a submission timed out: #{e.message}" }
|
||||||
@testrun[:status] ||= :timeout
|
@testrun[:status] ||= :timeout
|
||||||
@testrun[:output] = "timeout: #{@testrun[:output]}"
|
@testrun[:output] = "timeout: #{@testrun[:output]}"
|
||||||
extract_durations(e)
|
extract_durations(e)
|
||||||
|
rescue Runner::Error::OutOfMemory => e
|
||||||
|
send_and_store client_socket, {cmd: :status, status: :out_of_memory}
|
||||||
|
Rails.logger.debug { "Running a submission caused an out of memory error: #{e.message}" }
|
||||||
|
@testrun[:status] ||= :out_of_memory
|
||||||
|
@testrun[:exit_code] ||= 137
|
||||||
|
@testrun[:output] = "out_of_memory: #{@testrun[:output]}"
|
||||||
|
extract_durations(e)
|
||||||
rescue Runner::Error => e
|
rescue Runner::Error => e
|
||||||
# Regardless of the specific error cause, we send a `container_depleted` status to the client.
|
# Regardless of the specific error cause, we send a `container_depleted` status to the client.
|
||||||
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
||||||
close_client_connection(client_socket)
|
|
||||||
@testrun[:status] ||= :container_depleted
|
@testrun[:status] ||= :container_depleted
|
||||||
Rails.logger.debug { "Runner error while running a submission: #{e.message}" }
|
Rails.logger.debug { "Runner error while running a submission: #{e.message}" }
|
||||||
extract_durations(e)
|
extract_durations(e)
|
||||||
ensure
|
ensure
|
||||||
|
close_client_connection(client_socket)
|
||||||
save_testrun_output 'run'
|
save_testrun_output 'run'
|
||||||
end
|
end
|
||||||
# rubocop:enable Metrics/CyclomaticComplexity:
|
|
||||||
|
|
||||||
def score
|
def score
|
||||||
client_socket = nil
|
client_socket = nil
|
||||||
@ -256,14 +252,14 @@ class SubmissionsController < ApplicationController
|
|||||||
client_socket&.send_data(JSON.dump(@submission.calculate_score))
|
client_socket&.send_data(JSON.dump(@submission.calculate_score))
|
||||||
# To enable hints when scoring a submission, uncomment the next line:
|
# To enable hints when scoring a submission, uncomment the next line:
|
||||||
# send_hints(client_socket, StructuredError.where(submission: @submission))
|
# send_hints(client_socket, StructuredError.where(submission: @submission))
|
||||||
kill_client_socket(client_socket)
|
|
||||||
rescue Runner::Error => e
|
rescue Runner::Error => e
|
||||||
extract_durations(e)
|
extract_durations(e)
|
||||||
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
||||||
kill_client_socket(client_socket)
|
|
||||||
Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" }
|
Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" }
|
||||||
@testrun[:passed] = false
|
@testrun[:passed] = false
|
||||||
save_testrun_output 'assess'
|
save_testrun_output 'assess'
|
||||||
|
ensure
|
||||||
|
kill_client_socket(client_socket)
|
||||||
end
|
end
|
||||||
|
|
||||||
def create
|
def create
|
||||||
@ -289,7 +285,6 @@ class SubmissionsController < ApplicationController
|
|||||||
|
|
||||||
# The score is stored separately, we can forward it to the client immediately
|
# The score is stored separately, we can forward it to the client immediately
|
||||||
client_socket&.send_data(JSON.dump(@submission.test(@file)))
|
client_socket&.send_data(JSON.dump(@submission.test(@file)))
|
||||||
kill_client_socket(client_socket)
|
|
||||||
rescue Runner::Error => e
|
rescue Runner::Error => e
|
||||||
extract_durations(e)
|
extract_durations(e)
|
||||||
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
send_and_store client_socket, {cmd: :status, status: :container_depleted}
|
||||||
@ -297,6 +292,8 @@ class SubmissionsController < ApplicationController
|
|||||||
Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" }
|
Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" }
|
||||||
@testrun[:passed] = false
|
@testrun[:passed] = false
|
||||||
save_testrun_output 'assess'
|
save_testrun_output 'assess'
|
||||||
|
ensure
|
||||||
|
kill_client_socket(client_socket)
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -24,6 +24,8 @@ class Runner
|
|||||||
|
|
||||||
class WorkspaceError < Error; end
|
class WorkspaceError < Error; end
|
||||||
|
|
||||||
|
class OutOfMemory < Error; end
|
||||||
|
|
||||||
class Unknown < Error; end
|
class Unknown < Error; end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -137,6 +137,9 @@ class Runner < ApplicationRecord
|
|||||||
rescue Runner::Error::ExecutionTimeout => e
|
rescue Runner::Error::ExecutionTimeout => e
|
||||||
Rails.logger.debug { "Running command `#{command}` timed out: #{e.message}" }
|
Rails.logger.debug { "Running command `#{command}` timed out: #{e.message}" }
|
||||||
output.merge!(status: :timeout, container_execution_time: e.execution_duration)
|
output.merge!(status: :timeout, container_execution_time: e.execution_duration)
|
||||||
|
rescue Runner::Error::OutOfMemory => e
|
||||||
|
Rails.logger.debug { "Running command `#{command}` caused an out of memory error: #{e.message}" }
|
||||||
|
output.merge!(status: :out_of_memory, container_execution_time: e.execution_duration)
|
||||||
rescue Runner::Error::RunnerNotFound => e
|
rescue Runner::Error::RunnerNotFound => e
|
||||||
Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" }
|
Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" }
|
||||||
try += 1
|
try += 1
|
||||||
|
@ -161,9 +161,19 @@ class Runner::Connection
|
|||||||
# However, it might not be required for Poseidon.
|
# However, it might not be required for Poseidon.
|
||||||
@strategy.destroy_at_management
|
@strategy.destroy_at_management
|
||||||
@error = Runner::Error::ExecutionTimeout.new('Execution exceeded its time limit')
|
@error = Runner::Error::ExecutionTimeout.new('Execution exceeded its time limit')
|
||||||
|
when :out_of_memory
|
||||||
|
# This status is only used by Poseidon (with gVisor).
|
||||||
|
# The runner will be destroyed (and recreated) automatically.
|
||||||
|
@error = Runner::Error::OutOfMemory.new('Execution exceeded its memory limit')
|
||||||
when :terminated_by_codeocean, :terminated_by_management
|
when :terminated_by_codeocean, :terminated_by_management
|
||||||
|
# Poseidon (without gVisor) and DockerContainerPool do not handle memory limits explicitly.
|
||||||
|
# Instead, they signal that the program was terminated with exit code 137 (128 + 9).
|
||||||
|
if @exit_code == 137
|
||||||
|
@error = Runner::Error::OutOfMemory.new('Execution exceeded its memory limit')
|
||||||
|
else
|
||||||
@exit_callback.call @exit_code
|
@exit_callback.call @exit_code
|
||||||
list_filesystem
|
list_filesystem
|
||||||
|
end
|
||||||
when :terminated_by_client, :error
|
when :terminated_by_client, :error
|
||||||
@strategy.destroy_at_management
|
@strategy.destroy_at_management
|
||||||
else # :established
|
else # :established
|
||||||
@ -223,6 +233,11 @@ class Runner::Connection
|
|||||||
end
|
end
|
||||||
|
|
||||||
def handle_error(event)
|
def handle_error(event)
|
||||||
|
# Poseidon (with gVisor enabled!) sends an error message when the execution exceeds its memory limit.
|
||||||
|
# This is not an error in the sense of the runner management but rather a message.
|
||||||
|
# We handle it here to avoid the error handling in the default case.
|
||||||
|
return @status = :out_of_memory if event[:data] == 'the allocation was OOM Killed'
|
||||||
|
|
||||||
# In case of a (Nomad) error during execution, the runner management will notify us with an error message here.
|
# In case of a (Nomad) error during execution, the runner management will notify us with an error message here.
|
||||||
# This shouldn't happen too often and can be considered an internal server error by the runner management.
|
# This shouldn't happen too often and can be considered an internal server error by the runner management.
|
||||||
# More information is available in the logs of the runner management or the orchestrator (e.g., Nomad).
|
# More information is available in the logs of the runner management or the orchestrator (e.g., Nomad).
|
||||||
|
@ -161,6 +161,7 @@ describe SubmissionsController do
|
|||||||
context 'when no errors occur during execution' do
|
context 'when no errors occur during execution' do
|
||||||
before do
|
before do
|
||||||
allow_any_instance_of(described_class).to receive(:hijack)
|
allow_any_instance_of(described_class).to receive(:hijack)
|
||||||
|
allow_any_instance_of(described_class).to receive(:close_client_connection)
|
||||||
allow_any_instance_of(Submission).to receive(:run).and_return({})
|
allow_any_instance_of(Submission).to receive(:run).and_return({})
|
||||||
allow_any_instance_of(described_class).to receive(:save_testrun_output)
|
allow_any_instance_of(described_class).to receive(:save_testrun_output)
|
||||||
perform_request
|
perform_request
|
||||||
|
Reference in New Issue
Block a user