From d5b274c9f29a283dfcfcd5f0866cea152cf2c99c Mon Sep 17 00:00:00 2001 From: Felix Auringer <48409110+felixauringer@users.noreply.github.com> Date: Thu, 27 May 2021 10:21:25 +0200 Subject: [PATCH] Introduce new error types for runners The errors are raised in the runner model and in the runner connection class. In the submission controller the errors are rescued and, depending on the error, the status timeout / container depleted is sent to the client. --- app/controllers/submissions_controller.rb | 41 ++++--- app/errors/runner/error.rb | 3 + app/errors/runner/error/bad_request.rb | 3 + app/errors/runner/error/execution_timeout.rb | 3 + .../runner/error/internal_server_error.rb | 3 + app/errors/runner/error/not_available.rb | 3 + app/errors/runner/error/not_found.rb | 3 + app/errors/runner/error/unauthorized.rb | 3 + app/errors/runner/error/unknown.rb | 3 + app/errors/runner_not_available_error.rb | 4 - app/models/runner.rb | 106 +++++++++++++----- lib/runner/connection.rb | 15 ++- 12 files changed, 137 insertions(+), 53 deletions(-) create mode 100644 app/errors/runner/error.rb create mode 100644 app/errors/runner/error/bad_request.rb create mode 100644 app/errors/runner/error/execution_timeout.rb create mode 100644 app/errors/runner/error/internal_server_error.rb create mode 100644 app/errors/runner/error/not_available.rb create mode 100644 app/errors/runner/error/not_found.rb create mode 100644 app/errors/runner/error/unauthorized.rb create mode 100644 app/errors/runner/error/unknown.rb delete mode 100644 app/errors/runner_not_available_error.rb diff --git a/app/controllers/submissions_controller.rb b/app/controllers/submissions_controller.rb index 77cafbb8..93bdab1d 100644 --- a/app/controllers/submissions_controller.rb +++ b/app/controllers/submissions_controller.rb @@ -133,7 +133,7 @@ class SubmissionsController < ApplicationController end def handle_websockets(tubesock, runner, socket) - tubesock.send_data JSON.dump({'cmd' => 'status', 'status' => :container_running}) + tubesock.send_data JSON.dump({cmd: :status, status: :container_running}) @output = +'' socket.on :output do |data| @@ -182,21 +182,21 @@ class SubmissionsController < ApplicationController def run hijack do |tubesock| - if @embed_options[:disable_run] - kill_socket(tubesock) - else - begin - @container_execution_time = @submission.run(sanitize_filename) do |runner, socket| - @waiting_for_container_time = runner.waiting_time - handle_websockets(tubesock, runner, socket) - end - save_run_output - rescue RunnerNotAvailableError - tubesock.send_data JSON.dump({cmd: :timeout}) - kill_socket(tubesock) - Rails.logger.debug('Runner not available') - end + return kill_socket(tubesock) if @embed_options[:disable_run] + + @container_execution_time = @submission.run(sanitize_filename) do |runner, socket| + @waiting_for_container_time = runner.waiting_time + handle_websockets(tubesock, runner, socket) end + save_run_output + rescue Runner::Error::ExecutionTimeout => e + tubesock.send_data JSON.dump({cmd: :status, status: :timeout}) + kill_socket(tubesock) + Rails.logger.debug { "Running a submission failed: #{e.message}" } + rescue Runner::Error => e + tubesock.send_data JSON.dump({cmd: :status, status: :container_depleted}) + kill_socket(tubesock) + Rails.logger.debug { "Runner error while running a submission: #{e.message}" } end end @@ -244,8 +244,15 @@ class SubmissionsController < ApplicationController tubesock.send_data(@submission.calculate_score) # To enable hints when scoring a submission, uncomment the next line: # send_hints(tubesock, StructuredError.where(submission: @submission)) - - tubesock.send_data JSON.dump({'cmd' => 'exit'}) + rescue Runner::Error::ExecutionTimeout => e + tubesock.send_data JSON.dump({cmd: :status, status: :timeout}) + Rails.logger.debug { "Running a submission failed: #{e.message}" } + rescue Runner::Error => e + tubesock.send_data JSON.dump({cmd: :status, status: :container_depleted}) + Rails.logger.debug { "Runner error while scoring a submission: #{e.message}" } + ensure + tubesock.send_data JSON.dump({cmd: :exit}) + tubesock.close end ensure ActiveRecord::Base.connection_pool.release_connection diff --git a/app/errors/runner/error.rb b/app/errors/runner/error.rb new file mode 100644 index 00000000..3470f553 --- /dev/null +++ b/app/errors/runner/error.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error < ApplicationError; end diff --git a/app/errors/runner/error/bad_request.rb b/app/errors/runner/error/bad_request.rb new file mode 100644 index 00000000..98b8f5ca --- /dev/null +++ b/app/errors/runner/error/bad_request.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::BadRequest < Runner::Error; end diff --git a/app/errors/runner/error/execution_timeout.rb b/app/errors/runner/error/execution_timeout.rb new file mode 100644 index 00000000..c10806a5 --- /dev/null +++ b/app/errors/runner/error/execution_timeout.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::ExecutionTimeout < Runner::Error; end diff --git a/app/errors/runner/error/internal_server_error.rb b/app/errors/runner/error/internal_server_error.rb new file mode 100644 index 00000000..68f3f0b8 --- /dev/null +++ b/app/errors/runner/error/internal_server_error.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::InternalServerError < Runner::Error; end diff --git a/app/errors/runner/error/not_available.rb b/app/errors/runner/error/not_available.rb new file mode 100644 index 00000000..4785ea59 --- /dev/null +++ b/app/errors/runner/error/not_available.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::NotAvailable < Runner::Error; end diff --git a/app/errors/runner/error/not_found.rb b/app/errors/runner/error/not_found.rb new file mode 100644 index 00000000..329bcc71 --- /dev/null +++ b/app/errors/runner/error/not_found.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::NotFound < Runner::Error; end diff --git a/app/errors/runner/error/unauthorized.rb b/app/errors/runner/error/unauthorized.rb new file mode 100644 index 00000000..8bccf7fa --- /dev/null +++ b/app/errors/runner/error/unauthorized.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::Unauthorized < Runner::Error; end diff --git a/app/errors/runner/error/unknown.rb b/app/errors/runner/error/unknown.rb new file mode 100644 index 00000000..6e1a1fce --- /dev/null +++ b/app/errors/runner/error/unknown.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::Unknown < Runner::Error; end diff --git a/app/errors/runner_not_available_error.rb b/app/errors/runner_not_available_error.rb deleted file mode 100644 index 4390fad4..00000000 --- a/app/errors/runner_not_available_error.rb +++ /dev/null @@ -1,4 +0,0 @@ -# frozen_string_literal: true - -class RunnerNotAvailableError < ApplicationError -end diff --git a/app/models/runner.rb b/app/models/runner.rb index eec2a1e6..f8861a2a 100644 --- a/app/models/runner.rb +++ b/app/models/runner.rb @@ -4,51 +4,60 @@ class Runner < ApplicationRecord BASE_URL = CodeOcean::Config.new(:code_ocean).read[:runner_management][:url] HEADERS = {'Content-Type' => 'application/json'}.freeze UNUSED_EXPIRATION_TIME = CodeOcean::Config.new(:code_ocean).read[:runner_management][:unused_runner_expiration_time].seconds + ERRORS = %w[NOMAD_UNREACHABLE NOMAD_OVERLOAD NOMAD_INTERNAL_SERVER_ERROR UNKNOWN].freeze + + ERRORS.each do |error| + define_singleton_method :"error_#{error.downcase}" do + error + end + end belongs_to :execution_environment belongs_to :user, polymorphic: true - before_create :new_runner - before_destroy :destroy_runner + before_validation :request_remotely - validates :execution_environment, presence: true - validates :user, presence: true + validates :execution_environment, :user, :runner_id, presence: true def self.for(user, exercise) execution_environment = ExecutionEnvironment.find(exercise.execution_environment_id) runner = find_or_create_by(user: user, execution_environment: execution_environment) - return runner if runner.save + unless runner.persisted? + # runner was not saved in the database (was not valid) + raise Runner::Error::InternalServerError.new("Provided runner could not be saved: #{runner.errors.inspect}") + end - raise RunnerNotAvailableError.new('No runner available') + runner end def copy_files(files) url = "#{runner_url}/files" body = {copy: files.map {|filename, content| {path: filename, content: Base64.strict_encode64(content)} }} response = Faraday.patch(url, body.to_json, HEADERS) - return unless response.status == 404 - - # runner has disappeared for some reason - destroy - raise RunnerNotAvailableError.new('Runner unavailable') + handle_error response unless response.status == 204 end def execute_command(command) url = "#{runner_url}/execute" body = {command: command, timeLimit: execution_environment.permitted_execution_time} response = Faraday.post(url, body.to_json, HEADERS) - if response.status == 404 - # runner has disappeared for some reason - destroy - raise RunnerNotAvailableError.new('Runner unavailable') + if response.status == 200 + response_body = parse response + websocket_url = response_body[:websocketUrl] + if websocket_url.present? + return websocket_url + else + raise Runner::Error::Unknown.new('Runner management sent unexpected response') + end end - parse response + + handle_error response end def execute_interactively(command) starting_time = Time.zone.now - websocket_url = execute_command(command)[:websocketUrl] + websocket_url = execute_command(command) EventMachine.run do socket = Runner::Connection.new(websocket_url) yield(self, socket) if block_given? @@ -56,24 +65,64 @@ class Runner < ApplicationRecord Time.zone.now - starting_time # execution time end - def destroy_runner - Faraday.delete runner_url - end + # This method is currently not used. + # This does *not* destroy the ActiveRecord model. + def destroy_remotely + response = Faraday.delete runner_url + return if response.status == 204 - def status - # TODO: return actual state retrieved via websocket - :timeouted + if response.status == 404 + raise Runner::Error::NotFound.new('Runner not found') + else + handle_error response + end end private - def new_runner + def request_remotely + return if runner_id.present? + url = "#{BASE_URL}/runners" body = {executionEnvironmentId: execution_environment.id, inactivityTimeout: UNUSED_EXPIRATION_TIME} response = Faraday.post(url, body.to_json, HEADERS) - response_body = parse response - self.runner_id = response_body[:runnerId] - throw :abort unless response.status == 200 + + case response.status + when 200 + response_body = parse response + runner_id = response_body[:runnerId] + throw(:abort) if runner_id.blank? + self.runner_id = response_body[:runnerId] + when 404 + raise Runner::Error::NotFound.new('Execution environment not found') + else + handle_error response + end + end + + def handle_error(response) + case response.status + when 400 + response_body = parse response + raise Runner::Error::BadRequest.new(response_body[:message]) + when 401 + raise Runner::Error::Unauthorized.new('Authentication with runner management failed') + when 404 + # The runner does not exist in the runner management (e.g. due to an inactivity timeout). + # Delete the runner model in this case as it can not be used anymore. + destroy + raise Runner::Error::NotFound.new('Runner not found') + when 500 + response_body = parse response + error_code = response_body[:errorCode] + if error_code == Runner.error_nomad_overload + raise Runner::Error::NotAvailable.new("No runner available (#{error_code}): #{response_body[:message]}") + else + raise Runner::Error::InternalServerError.new("#{response_body[:errorCode]}: #{response_body[:message]}") + end + else + raise Runner::Error::Unknown.new('Runner management sent unexpected response') + end end def runner_url @@ -82,5 +131,8 @@ class Runner < ApplicationRecord def parse(response) JSON.parse(response.body).deep_symbolize_keys + rescue JSON::ParserError => e + # the runner management should not send invalid json + raise Runner::Error::Unknown.new("Error parsing response from runner management: #{e.message}") end end diff --git a/lib/runner/connection.rb b/lib/runner/connection.rb index ea3840a6..55378e04 100644 --- a/lib/runner/connection.rb +++ b/lib/runner/connection.rb @@ -4,16 +4,20 @@ require 'faye/websocket/client' require 'json_schemer' class Runner::Connection + # These are events for which callbacks can be registered. EVENTS = %i[start output exit stdout stderr].freeze BACKEND_OUTPUT_SCHEMA = JSONSchemer.schema(JSON.parse(File.read('lib/runner/backend-output.schema.json'))) def initialize(url) @socket = Faye::WebSocket::Client.new(url, [], ping: 5) + # For every event type of faye websockets, the corresponding + # RunnerConnection method starting with `on_` is called. %i[open message error close].each do |event_type| @socket.on(event_type) {|event| __send__(:"on_#{event_type}", event) } end + # This registers empty default callbacks. EVENTS.each {|event_type| instance_variable_set(:"@#{event_type}_callback", ->(e) {}) } @start_callback = -> {} @exit_code = 0 @@ -43,6 +47,7 @@ class Runner::Connection return unless BACKEND_OUTPUT_SCHEMA.valid?(JSON.parse(event.data)) event = decode(event.data) + # There is one `handle_` method for every message type defined in the WebSocket schema. __send__("handle_#{event[:type]}", event) end @@ -50,7 +55,7 @@ class Runner::Connection @start_callback.call end - def on_error(event); end + def on_error(_event); end def on_close(_event) @exit_callback.call @exit_code @@ -70,11 +75,11 @@ class Runner::Connection @output_callback.call event[:data] end - def handle_error(event) end + def handle_error(_event); end - def handle_start(event) end + def handle_start(_event); end - def handle_timeout(event) - # TODO: set the runner state + def handle_timeout(_event) + raise Runner::Error::ExecutionTimeout.new('Execution exceeded its time limit') end end