Improve error resilience and handling

Timeouts are now handled correctly and the Runner automatically
creates the execution environment if it could not be found in
Poseidon. The runner is deleted locally if Poseidon returns
a bad request error.
This commit is contained in:
Felix Auringer
2021-06-14 09:22:29 +02:00
committed by Sebastian Serth
parent b6bc578aea
commit 413f9b2705
7 changed files with 136 additions and 29 deletions

View File

@ -7,6 +7,7 @@ class Runner::Connection
# These are events for which callbacks can be registered.
EVENTS = %i[start output exit stdout stderr].freeze
BACKEND_OUTPUT_SCHEMA = JSONSchemer.schema(JSON.parse(File.read('lib/runner/backend-output.schema.json')))
TIMEOUT_EXIT_STATUS = -100
def initialize(url)
@socket = Faye::WebSocket::Client.new(url, [], ping: 5)
@ -20,7 +21,8 @@ class Runner::Connection
# This registers empty default callbacks.
EVENTS.each {|event_type| instance_variable_set(:"@#{event_type}_callback", ->(e) {}) }
@start_callback = -> {}
@exit_code = 0
# Fail if no exit status was returned.
@exit_code = 1
end
def on(event, &block)
@ -80,6 +82,7 @@ class Runner::Connection
def handle_start(_event); end
def handle_timeout(_event)
@exit_code = TIMEOUT_EXIT_STATUS
raise Runner::Error::ExecutionTimeout.new('Execution exceeded its time limit')
end
end

View File

@ -10,6 +10,10 @@ class Runner::Strategy::Poseidon < Runner::Strategy
end
end
def self.sync_environment(environment)
environment.copy_to_poseidon
end
def self.request_from_management(environment)
url = "#{Runner::BASE_URL}/runners"
body = {executionEnvironmentId: environment.id, inactivityTimeout: Runner::UNUSED_EXPIRATION_TIME}
@ -21,10 +25,12 @@ class Runner::Strategy::Poseidon < Runner::Strategy
runner_id = response_body[:runnerId]
runner_id.presence || raise(Runner::Error::Unknown.new('Poseidon did not send a runner id'))
when 404
raise Runner::Error::NotFound.new('Execution environment not found')
raise Runner::Error::EnvironmentNotFound.new
else
handle_error response
end
rescue Faraday::Error => e
raise Runner::Error::Unknown.new("Faraday request to runner management failed: #{e.inspect}")
end
def self.handle_error(response)
@ -35,7 +41,7 @@ class Runner::Strategy::Poseidon < Runner::Strategy
when 401
raise Runner::Error::Unauthorized.new('Authentication with Poseidon failed')
when 404
raise Runner::Error::NotFound.new('Runner not found')
raise Runner::Error::RunnerNotFound.new
when 500
response_body = parse response
error_code = response_body[:errorCode]
@ -60,7 +66,12 @@ class Runner::Strategy::Poseidon < Runner::Strategy
url = "#{runner_url}/files"
body = {copy: files.map {|filename, content| {path: filename, content: Base64.strict_encode64(content)} }}
response = Faraday.patch(url, body.to_json, HEADERS)
self.class.handle_error response unless response.status == 204
return if response.status == 204
Runner.destroy(@runner_id) if response.status == 400
self.class.handle_error response
rescue Faraday::Error => e
raise Runner::Error::Unknown.new("Faraday request to runner management failed: #{e.inspect}")
end
def attach_to_execution(command)
@ -68,7 +79,7 @@ class Runner::Strategy::Poseidon < Runner::Strategy
websocket_url = execute_command(command)
EventMachine.run do
socket = Runner::Connection.new(websocket_url)
yield(socket) if block_given?
yield(socket)
end
Time.zone.now - starting_time # execution duration
end
@ -76,6 +87,8 @@ class Runner::Strategy::Poseidon < Runner::Strategy
def destroy_at_management
response = Faraday.delete runner_url
self.class.handle_error response unless response.status == 204
rescue Faraday::Error => e
raise Runner::Error::Unknown.new("Faraday request to runner management failed: #{e.inspect}")
end
private
@ -84,17 +97,22 @@ class Runner::Strategy::Poseidon < Runner::Strategy
url = "#{runner_url}/execute"
body = {command: command, timeLimit: @execution_environment.permitted_execution_time}
response = Faraday.post(url, body.to_json, HEADERS)
if response.status == 200
response_body = self.class.parse response
websocket_url = response_body[:websocketUrl]
if websocket_url.present?
return websocket_url
else
raise Runner::Error::Unknown.new('Poseidon did not send websocket url')
end
case response.status
when 200
response_body = self.class.parse response
websocket_url = response_body[:websocketUrl]
if websocket_url.present?
return websocket_url
else
raise Runner::Error::Unknown.new('Poseidon did not send websocket url')
end
when 400
Runner.destroy(@runner_id)
end
self.class.handle_error response
rescue Faraday::Error => e
raise Runner::Error::Unknown.new("Faraday request to runner management failed: #{e.inspect}")
end
def runner_url