diff --git a/app/controllers/submissions_controller.rb b/app/controllers/submissions_controller.rb index 77cafbb8..93bdab1d 100644 --- a/app/controllers/submissions_controller.rb +++ b/app/controllers/submissions_controller.rb @@ -133,7 +133,7 @@ class SubmissionsController < ApplicationController end def handle_websockets(tubesock, runner, socket) - tubesock.send_data JSON.dump({'cmd' => 'status', 'status' => :container_running}) + tubesock.send_data JSON.dump({cmd: :status, status: :container_running}) @output = +'' socket.on :output do |data| @@ -182,21 +182,21 @@ class SubmissionsController < ApplicationController def run hijack do |tubesock| - if @embed_options[:disable_run] - kill_socket(tubesock) - else - begin - @container_execution_time = @submission.run(sanitize_filename) do |runner, socket| - @waiting_for_container_time = runner.waiting_time - handle_websockets(tubesock, runner, socket) - end - save_run_output - rescue RunnerNotAvailableError - tubesock.send_data JSON.dump({cmd: :timeout}) - kill_socket(tubesock) - Rails.logger.debug('Runner not available') - end + return kill_socket(tubesock) if @embed_options[:disable_run] + + @container_execution_time = @submission.run(sanitize_filename) do |runner, socket| + @waiting_for_container_time = runner.waiting_time + handle_websockets(tubesock, runner, socket) end + save_run_output + rescue Runner::Error::ExecutionTimeout => e + tubesock.send_data JSON.dump({cmd: :status, status: :timeout}) + kill_socket(tubesock) + Rails.logger.debug { "Running a submission failed: #{e.message}" } + rescue Runner::Error => e + tubesock.send_data JSON.dump({cmd: :status, status: :container_depleted}) + kill_socket(tubesock) + Rails.logger.debug { "Runner error while running a submission: #{e.message}" } end end @@ -244,8 +244,15 @@ class SubmissionsController < ApplicationController tubesock.send_data(@submission.calculate_score) # To enable hints when scoring a submission, uncomment the next line: # send_hints(tubesock, StructuredError.where(submission: @submission)) - - tubesock.send_data JSON.dump({'cmd' => 'exit'}) + rescue Runner::Error::ExecutionTimeout => e + tubesock.send_data JSON.dump({cmd: :status, status: :timeout}) + Rails.logger.debug { "Running a submission failed: #{e.message}" } + rescue Runner::Error => e + tubesock.send_data JSON.dump({cmd: :status, status: :container_depleted}) + Rails.logger.debug { "Runner error while scoring a submission: #{e.message}" } + ensure + tubesock.send_data JSON.dump({cmd: :exit}) + tubesock.close end ensure ActiveRecord::Base.connection_pool.release_connection diff --git a/app/errors/runner/error.rb b/app/errors/runner/error.rb new file mode 100644 index 00000000..3470f553 --- /dev/null +++ b/app/errors/runner/error.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error < ApplicationError; end diff --git a/app/errors/runner/error/bad_request.rb b/app/errors/runner/error/bad_request.rb new file mode 100644 index 00000000..98b8f5ca --- /dev/null +++ b/app/errors/runner/error/bad_request.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::BadRequest < Runner::Error; end diff --git a/app/errors/runner/error/execution_timeout.rb b/app/errors/runner/error/execution_timeout.rb new file mode 100644 index 00000000..c10806a5 --- /dev/null +++ b/app/errors/runner/error/execution_timeout.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::ExecutionTimeout < Runner::Error; end diff --git a/app/errors/runner/error/internal_server_error.rb b/app/errors/runner/error/internal_server_error.rb new file mode 100644 index 00000000..68f3f0b8 --- /dev/null +++ b/app/errors/runner/error/internal_server_error.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::InternalServerError < Runner::Error; end diff --git a/app/errors/runner/error/not_available.rb b/app/errors/runner/error/not_available.rb new file mode 100644 index 00000000..4785ea59 --- /dev/null +++ b/app/errors/runner/error/not_available.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::NotAvailable < Runner::Error; end diff --git a/app/errors/runner/error/not_found.rb b/app/errors/runner/error/not_found.rb new file mode 100644 index 00000000..329bcc71 --- /dev/null +++ b/app/errors/runner/error/not_found.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::NotFound < Runner::Error; end diff --git a/app/errors/runner/error/unauthorized.rb b/app/errors/runner/error/unauthorized.rb new file mode 100644 index 00000000..8bccf7fa --- /dev/null +++ b/app/errors/runner/error/unauthorized.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::Unauthorized < Runner::Error; end diff --git a/app/errors/runner/error/unknown.rb b/app/errors/runner/error/unknown.rb new file mode 100644 index 00000000..6e1a1fce --- /dev/null +++ b/app/errors/runner/error/unknown.rb @@ -0,0 +1,3 @@ +# frozen_string_literal: true + +class Runner::Error::Unknown < Runner::Error; end diff --git a/app/errors/runner_not_available_error.rb b/app/errors/runner_not_available_error.rb deleted file mode 100644 index 4390fad4..00000000 --- a/app/errors/runner_not_available_error.rb +++ /dev/null @@ -1,4 +0,0 @@ -# frozen_string_literal: true - -class RunnerNotAvailableError < ApplicationError -end diff --git a/app/models/runner.rb b/app/models/runner.rb index eec2a1e6..f8861a2a 100644 --- a/app/models/runner.rb +++ b/app/models/runner.rb @@ -4,51 +4,60 @@ class Runner < ApplicationRecord BASE_URL = CodeOcean::Config.new(:code_ocean).read[:runner_management][:url] HEADERS = {'Content-Type' => 'application/json'}.freeze UNUSED_EXPIRATION_TIME = CodeOcean::Config.new(:code_ocean).read[:runner_management][:unused_runner_expiration_time].seconds + ERRORS = %w[NOMAD_UNREACHABLE NOMAD_OVERLOAD NOMAD_INTERNAL_SERVER_ERROR UNKNOWN].freeze + + ERRORS.each do |error| + define_singleton_method :"error_#{error.downcase}" do + error + end + end belongs_to :execution_environment belongs_to :user, polymorphic: true - before_create :new_runner - before_destroy :destroy_runner + before_validation :request_remotely - validates :execution_environment, presence: true - validates :user, presence: true + validates :execution_environment, :user, :runner_id, presence: true def self.for(user, exercise) execution_environment = ExecutionEnvironment.find(exercise.execution_environment_id) runner = find_or_create_by(user: user, execution_environment: execution_environment) - return runner if runner.save + unless runner.persisted? + # runner was not saved in the database (was not valid) + raise Runner::Error::InternalServerError.new("Provided runner could not be saved: #{runner.errors.inspect}") + end - raise RunnerNotAvailableError.new('No runner available') + runner end def copy_files(files) url = "#{runner_url}/files" body = {copy: files.map {|filename, content| {path: filename, content: Base64.strict_encode64(content)} }} response = Faraday.patch(url, body.to_json, HEADERS) - return unless response.status == 404 - - # runner has disappeared for some reason - destroy - raise RunnerNotAvailableError.new('Runner unavailable') + handle_error response unless response.status == 204 end def execute_command(command) url = "#{runner_url}/execute" body = {command: command, timeLimit: execution_environment.permitted_execution_time} response = Faraday.post(url, body.to_json, HEADERS) - if response.status == 404 - # runner has disappeared for some reason - destroy - raise RunnerNotAvailableError.new('Runner unavailable') + if response.status == 200 + response_body = parse response + websocket_url = response_body[:websocketUrl] + if websocket_url.present? + return websocket_url + else + raise Runner::Error::Unknown.new('Runner management sent unexpected response') + end end - parse response + + handle_error response end def execute_interactively(command) starting_time = Time.zone.now - websocket_url = execute_command(command)[:websocketUrl] + websocket_url = execute_command(command) EventMachine.run do socket = Runner::Connection.new(websocket_url) yield(self, socket) if block_given? @@ -56,24 +65,64 @@ class Runner < ApplicationRecord Time.zone.now - starting_time # execution time end - def destroy_runner - Faraday.delete runner_url - end + # This method is currently not used. + # This does *not* destroy the ActiveRecord model. + def destroy_remotely + response = Faraday.delete runner_url + return if response.status == 204 - def status - # TODO: return actual state retrieved via websocket - :timeouted + if response.status == 404 + raise Runner::Error::NotFound.new('Runner not found') + else + handle_error response + end end private - def new_runner + def request_remotely + return if runner_id.present? + url = "#{BASE_URL}/runners" body = {executionEnvironmentId: execution_environment.id, inactivityTimeout: UNUSED_EXPIRATION_TIME} response = Faraday.post(url, body.to_json, HEADERS) - response_body = parse response - self.runner_id = response_body[:runnerId] - throw :abort unless response.status == 200 + + case response.status + when 200 + response_body = parse response + runner_id = response_body[:runnerId] + throw(:abort) if runner_id.blank? + self.runner_id = response_body[:runnerId] + when 404 + raise Runner::Error::NotFound.new('Execution environment not found') + else + handle_error response + end + end + + def handle_error(response) + case response.status + when 400 + response_body = parse response + raise Runner::Error::BadRequest.new(response_body[:message]) + when 401 + raise Runner::Error::Unauthorized.new('Authentication with runner management failed') + when 404 + # The runner does not exist in the runner management (e.g. due to an inactivity timeout). + # Delete the runner model in this case as it can not be used anymore. + destroy + raise Runner::Error::NotFound.new('Runner not found') + when 500 + response_body = parse response + error_code = response_body[:errorCode] + if error_code == Runner.error_nomad_overload + raise Runner::Error::NotAvailable.new("No runner available (#{error_code}): #{response_body[:message]}") + else + raise Runner::Error::InternalServerError.new("#{response_body[:errorCode]}: #{response_body[:message]}") + end + else + raise Runner::Error::Unknown.new('Runner management sent unexpected response') + end end def runner_url @@ -82,5 +131,8 @@ class Runner < ApplicationRecord def parse(response) JSON.parse(response.body).deep_symbolize_keys + rescue JSON::ParserError => e + # the runner management should not send invalid json + raise Runner::Error::Unknown.new("Error parsing response from runner management: #{e.message}") end end diff --git a/lib/runner/connection.rb b/lib/runner/connection.rb index ea3840a6..55378e04 100644 --- a/lib/runner/connection.rb +++ b/lib/runner/connection.rb @@ -4,16 +4,20 @@ require 'faye/websocket/client' require 'json_schemer' class Runner::Connection + # These are events for which callbacks can be registered. EVENTS = %i[start output exit stdout stderr].freeze BACKEND_OUTPUT_SCHEMA = JSONSchemer.schema(JSON.parse(File.read('lib/runner/backend-output.schema.json'))) def initialize(url) @socket = Faye::WebSocket::Client.new(url, [], ping: 5) + # For every event type of faye websockets, the corresponding + # RunnerConnection method starting with `on_` is called. %i[open message error close].each do |event_type| @socket.on(event_type) {|event| __send__(:"on_#{event_type}", event) } end + # This registers empty default callbacks. EVENTS.each {|event_type| instance_variable_set(:"@#{event_type}_callback", ->(e) {}) } @start_callback = -> {} @exit_code = 0 @@ -43,6 +47,7 @@ class Runner::Connection return unless BACKEND_OUTPUT_SCHEMA.valid?(JSON.parse(event.data)) event = decode(event.data) + # There is one `handle_` method for every message type defined in the WebSocket schema. __send__("handle_#{event[:type]}", event) end @@ -50,7 +55,7 @@ class Runner::Connection @start_callback.call end - def on_error(event); end + def on_error(_event); end def on_close(_event) @exit_callback.call @exit_code @@ -70,11 +75,11 @@ class Runner::Connection @output_callback.call event[:data] end - def handle_error(event) end + def handle_error(_event); end - def handle_start(event) end + def handle_start(_event); end - def handle_timeout(event) - # TODO: set the runner state + def handle_timeout(_event) + raise Runner::Error::ExecutionTimeout.new('Execution exceeded its time limit') end end