Exclusively lock Runners during code executions

Previously, the same runner could be used multiple times with different submissions simultaneously. This, however, yielded errors, for example when one submission time oud (causing the running to be deleted) while another submission was still executed. Admin actions, such as the shell, can be still executed regardless of any other code execution. Fixes CODEOCEAN-HG Fixes openHPI/poseidon#423
2023-10-29 22:17:31 +01:00
parent 427b54d306
commit 8fc5123bae
16 changed files with 115 additions and 22 deletions
--- a/app/assets/javascripts/editor/editor.js.erb
+++ b/app/assets/javascripts/editor/editor.js.erb
@ -773,6 +773,8 @@ var CodeOceanEditor = {
            this.showContainerDepletedMessage();
        } else if (output.status === 'out_of_memory') {
            this.showOutOfMemoryMessage();
+        } else if (output.status === 'runner_in_use') {
+            this.showRunnerInUseMessage();
        }
    },

@ -832,6 +834,13 @@ var CodeOceanEditor = {
        });
    },

+    showRunnerInUseMessage: function () {
+        $.flash.warning({
+            icon: ['fa-solid', 'fa-triangle-exclamation'],
+            text: I18n.t('exercises.editor.runner_in_use')
+        });
+    },
+
    showTimeoutMessage: function () {
        $.flash.info({
            icon: ['fa-regular', 'fa-clock'],
--- a/app/assets/javascripts/editor/evaluation.js
+++ b/app/assets/javascripts/editor/evaluation.js
@ -109,6 +109,11 @@ CodeOceanEditorEvaluation = {
        })) {
            this.showOutOfMemoryMessage();
        }
+        if (_.some(response, function (result) {
+            return result.status === 'runner_in_use';
+        })) {
+            this.showRunnerInUseMessage();
+        }
        if (_.some(response, function (result) {
            return result.status === 'container_depleted';
        })) {
--- a/app/controllers/execution_environments_controller.rb
+++ b/app/controllers/execution_environments_controller.rb
@ -33,7 +33,7 @@ class ExecutionEnvironmentsController < ApplicationController
  def execute_command
    runner = Runner.for(current_user, @execution_environment)
    @privileged_execution = ActiveModel::Type::Boolean.new.cast(params[:sudo]) || @execution_environment.privileged_execution
-    output = runner.execute_command(params[:command], privileged_execution: @privileged_execution, raise_exception: false)
+    output = runner.execute_command(params[:command], privileged_execution: @privileged_execution, raise_exception: false, exclusive: false)
    render json: output.except(:messages)
  end

@ -41,11 +41,11 @@ class ExecutionEnvironmentsController < ApplicationController
    runner = Runner.for(current_user, @execution_environment)
    @privileged_execution = ActiveModel::Type::Boolean.new.cast(params[:sudo]) || @execution_environment.privileged_execution
    begin
-      files = runner.retrieve_files(path: params[:path], recursive: false, privileged_execution: @privileged_execution)
+      files = runner.retrieve_files(path: params[:path], recursive: false, privileged_execution: @privileged_execution, exclusive: false)
      downloadable_files, additional_directories = convert_files_json_to_files files
      js_tree = FileTree.new(downloadable_files, additional_directories, force_closed: true).to_js_tree
      render json: js_tree[:core][:data]
-    rescue Runner::Error::RunnerNotFound, Runner::Error::WorkspaceError
+    rescue Runner::Error::RunnerNotFound, Runner::Error::WorkspaceError, Runner::Error::RunnerInUse
      render json: []
    end
  end
--- a/app/controllers/live_streams_controller.rb
+++ b/app/controllers/live_streams_controller.rb
@ -26,15 +26,15 @@ class LiveStreamsController < ApplicationController
    runner = Runner.for(current_user, @execution_environment)
    fallback_location = shell_execution_environment_path(@execution_environment)
    privileged = params[:sudo] || @execution_environment.privileged_execution?
-    send_runner_file(runner, desired_file, fallback_location, privileged:)
+    send_runner_file(runner, desired_file, fallback_location, privileged:, exclusive: false)
  end

  private

-  def send_runner_file(runner, desired_file, redirect_fallback = root_path, privileged: false)
+  def send_runner_file(runner, desired_file, redirect_fallback = root_path, privileged: false, exclusive: true)
    filename = File.basename(desired_file)
    send_stream(filename:, type: 'application/octet-stream', disposition: 'attachment') do |stream|
-      runner.download_file(desired_file, privileged_execution: privileged) do |chunk, overall_size, _content_type|
+      runner.download_file(desired_file, privileged_execution: privileged, exclusive:) do |chunk, overall_size, _content_type|
        unless response.committed?
          # Disable Rack::ETag, which would otherwise cause the response to be cached
          # See https://github.com/rack/rack/issues/1619#issuecomment-848460528
--- a/app/controllers/remote_evaluation_controller.rb
+++ b/app/controllers/remote_evaluation_controller.rb
@ -72,6 +72,9 @@ class RemoteEvaluationController < ApplicationController
      # TODO: check token expired?
      {message: 'No exercise found for this validation_token! Please keep out!', status: 401}
    end
+  rescue Runner::Error::RunnerInUse => e
+    Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+    {message: I18n.t('exercises.editor.runner_in_use'), status: 409}
  rescue Runner::Error => e
    Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" }
    {message: I18n.t('exercises.editor.depleted'), status: 503}
--- a/app/controllers/request_for_comments_controller.rb
+++ b/app/controllers/request_for_comments_controller.rb
@ -167,11 +167,20 @@ class RequestForCommentsController < ApplicationController

    respond_to do |format|
      if @request_for_comment.save
+        begin
          # execute the tests here and wait until they finished.
          # As the same runner is used for the score and test run, no parallelization is possible
          # A run is triggered from the frontend and does not need to be handled here.
          @request_for_comment.submission.calculate_score(current_user)
+        rescue Runner::Error::RunnerInUse => e
+          Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+          format.json { render json: {error: t('exercises.editor.runner_in_use'), status: :runner_in_use}, status: :conflict }
+        rescue Runner::Error => e
+          Rails.logger.debug { "Runner error while requesting comments: #{e.message}" }
+          format.json { render json: {danger: t('exercises.editor.depleted'), status: :container_depleted}, status: :service_unavailable }
+        else
          format.json { render :show, status: :created, location: @request_for_comment }
+        end
      else
        format.html { render :new }
        format.json { render json: @request_for_comment.errors, status: :unprocessable_entity }
--- a/app/controllers/submissions_controller.rb
+++ b/app/controllers/submissions_controller.rb
@ -234,6 +234,12 @@ class SubmissionsController < ApplicationController
    @testrun[:exit_code] ||= 137
    @testrun[:output] = "out_of_memory: #{@testrun[:output]}"
    extract_durations(e)
+  rescue Runner::Error::RunnerInUse => e
+    send_and_store client_socket, {cmd: :status, status: :runner_in_use}
+    Rails.logger.debug { "Running a submission failed because the runner was already in use: #{e.message}" }
+    @testrun[:status] ||= :runner_in_use
+    @testrun[:output] = "runner_in_use: #{@testrun[:output]}"
+    extract_durations(e)
  rescue Runner::Error => e
    # Regardless of the specific error cause, we send a `container_depleted` status to the client.
    send_and_store client_socket, {cmd: :status, status: :container_depleted}
@ -266,6 +272,14 @@ class SubmissionsController < ApplicationController
    client_socket&.send_data(JSON.dump(@submission.calculate_score(current_user)))
    # To enable hints when scoring a submission, uncomment the next line:
    # send_hints(client_socket, StructuredError.where(submission: @submission))
+  rescue Runner::Error::RunnerInUse => e
+    extract_durations(e)
+    send_and_store client_socket, {cmd: :status, status: :runner_in_use}
+    Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+    @testrun[:passed] = false
+    @testrun[:status] ||= :runner_in_use
+    @testrun[:output] = "runner_in_use: #{@testrun[:output]}"
+    save_testrun_output 'assess'
  rescue Runner::Error => e
    extract_durations(e)
    send_and_store client_socket, {cmd: :status, status: :container_depleted}
@ -301,10 +315,17 @@ class SubmissionsController < ApplicationController

    # The score is stored separately, we can forward it to the client immediately
    client_socket&.send_data(JSON.dump(@submission.test(@file, current_user)))
+  rescue Runner::Error::RunnerInUse => e
+    extract_durations(e)
+    send_and_store client_socket, {cmd: :status, status: :runner_in_use}
+    Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+    @testrun[:passed] = false
+    @testrun[:status] ||= :runner_in_use
+    @testrun[:output] = "runner_in_use: #{@testrun[:output]}"
+    save_testrun_output 'assess'
  rescue Runner::Error => e
    extract_durations(e)
    send_and_store client_socket, {cmd: :status, status: :container_depleted}
-    kill_client_socket(client_socket)
    Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" }
    Sentry.capture_exception(e)
    @testrun[:passed] = false
--- a/app/errors/runner/error.rb
+++ b/app/errors/runner/error.rb
@ -16,6 +16,8 @@ class Runner

    class Unauthorized < Error; end

+    class RunnerInUse < Error; end
+
    class RunnerNotFound < Error; end

    class FaradayError < Error; end
--- a/app/models/execution_environment.rb
+++ b/app/models/execution_environment.rb
@ -92,7 +92,7 @@ class ExecutionEnvironment < ApplicationRecord
    retries = 0
    begin
      runner = Runner.for(author, self)
-      output = runner.execute_command(VALIDATION_COMMAND)
+      output = runner.execute_command(VALIDATION_COMMAND, exclusive: false)
      errors.add(:docker_image, "error: #{output[:stderr]}") if output[:stderr].present?
    rescue Runner::Error => e
      # In case of an Runner::Error, we retry multiple times before giving up.
--- a/app/models/runner.rb
+++ b/app/models/runner.rb
@ -44,18 +44,24 @@ class Runner < ApplicationRecord
  end

  def copy_files(files)
+    reserve!
    @strategy.copy_files(files)
  rescue Runner::Error::RunnerNotFound
    request_new_id
    save
    @strategy.copy_files(files)
+  ensure
+    release!
  end

-  def download_file(...)
-    @strategy.download_file(...)
+  def download_file(desired_file, privileged_execution:, exclusive: true)
+    reserve! if exclusive
+    @strategy.download_file(desired_file, privileged_execution:)
+    release! if exclusive
  end

-  def retrieve_files(raise_exception: true, **)
+  def retrieve_files(raise_exception: true, exclusive: true, **)
+    reserve! if exclusive
    try = 0
    begin
      if try.nonzero?
@ -77,12 +83,14 @@ class Runner < ApplicationRecord
      # We forward the exception if requested
      raise e if raise_exception && defined?(e) && e.present?

-      # Otherwise, we return an hash with empty files
+      # Otherwise, we return an hash with empty files and release the runner
+      release! if exclusive
      {'files' => []}
    end
  end

-  def attach_to_execution(command, privileged_execution: false, &block)
+  def attach_to_execution(command, privileged_execution: false, exclusive: true, &block)
+    reserve! if exclusive
    Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Starting execution with Runner #{id} for #{contributor_type} #{contributor_id}." }
    starting_time = Time.zone.now
    begin
@ -100,11 +108,12 @@ class Runner < ApplicationRecord
      e.execution_duration = Time.zone.now - starting_time
      raise
    end
+    release! if exclusive
    Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Stopped execution with Runner #{id} for #{contributor_type} #{contributor_id}." }
    Time.zone.now - starting_time # execution duration
  end

-  def execute_command(command, privileged_execution: false, raise_exception: true)
+  def execute_command(command, privileged_execution: false, raise_exception: true, exclusive: true)
    output = {
      stdout: +'',
      stderr: +'',
@ -119,7 +128,7 @@ class Runner < ApplicationRecord
        save
      end

-      execution_time = attach_to_execution(command, privileged_execution:) do |socket, starting_time|
+      execution_time = attach_to_execution(command, privileged_execution:, exclusive:) do |socket, starting_time|
        socket.on :stderr do |data|
          output[:stderr] << data
          output[:messages].push({cmd: :write, stream: :stderr, log: data, timestamp: Time.zone.now - starting_time})
@ -139,6 +148,9 @@ class Runner < ApplicationRecord
    rescue Runner::Error::OutOfMemory => e
      Rails.logger.debug { "Running command `#{command}` caused an out of memory error: #{e.message}" }
      output.merge!(status: :out_of_memory, container_execution_time: e.execution_duration)
+    rescue Runner::Error::RunnerInUse => e
+      Rails.logger.debug { "Running command `#{command}` failed because the runner was already in use: #{e.message}" }
+      output.merge!(status: :runner_in_use, container_execution_time: e.execution_duration)
    rescue Runner::Error::RunnerNotFound => e
      Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" }
      try += 1
@ -166,6 +178,27 @@ class Runner < ApplicationRecord

  def destroy_at_management
    @strategy.destroy_at_management
+    update!(runner_id: nil, reserved_until: nil)
+  end
+
+  def reserve!
+    with_lock do
+      if reserved_until.present? && reserved_until > Time.zone.now
+        @error = Runner::Error::RunnerInUse.new("The desired Runner #{id} is already in use until #{reserved_until.iso8601}.")
+        raise @error
+      else
+        update!(reserved_until: Time.zone.now + execution_environment.permitted_execution_time.seconds)
+        @error = nil
+      end
+    end
+  end
+
+  def release!
+    return if @error.present?
+
+    with_lock do
+      update!(reserved_until: nil)
+    end
  end

  private
--- a/app/models/testrun.rb
+++ b/app/models/testrun.rb
@ -14,6 +14,7 @@ class Testrun < ApplicationRecord
    timeout: 3,
    out_of_memory: 4,
    terminated_by_client: 5,
+    runner_in_use: 6,
  }, _default: :ok, _prefix: true

  validates :exit_code, numericality: {only_integer: true, min: 0, max: 255}, allow_nil: true
--- a/config/locales/de.yml
+++ b/config/locales/de.yml
@ -398,6 +398,7 @@ de:
      network: 'Während Ihr Code läuft, ist Port %{port} unter folgender Adresse erreichbar: <a href="%{address}" target="_blank" rel="noopener">%{address}</a>.'
      render: Anzeigen
      run: Ausführen
+      runner_in_use: Sie führen momentan Code aus. Bitte stoppen Sie die vorherige Ausführung oder warten Sie einen Moment, bevor Sie fortfahren.
      run_failure: Ihr Code konnte nicht auf der Plattform ausgeführt werden.
      run_success: Ihr Code wurde auf der Plattform ausgeführt.
      requestComments: Kommentare erbitten
--- a/config/locales/en.yml
+++ b/config/locales/en.yml
@ -398,6 +398,7 @@ en:
      network: 'While your code is running, port %{port} is accessible using the following address: <a href="%{address}" target="_blank" rel="noopener">%{address}</a>.'
      render: Render
      run: Run
+      runner_in_use: You are currently running code. Please stop the previous execution, or wait a moment before proceeding.
      run_failure: Your code could not be run.
      run_success: Your code was run on our servers.
      requestComments: Request Comments
--- a/db/migrate/20231029172331_add_reserved_until_to_runners.rb
+++ b/db/migrate/20231029172331_add_reserved_until_to_runners.rb
@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+class AddReservedUntilToRunners < ActiveRecord::Migration[7.1]
+  def change
+    add_column :runners, :reserved_until, :datetime, null: true, default: nil
+  end
+end
--- a/db/schema.rb
+++ b/db/schema.rb
@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.

-ActiveRecord::Schema[7.1].define(version: 2023_10_13_194635) do
+ActiveRecord::Schema[7.1].define(version: 2023_10_29_172331) do
  # These are extensions that must be enabled in order to support this database
  enable_extension "pg_trgm"
  enable_extension "pgcrypto"
@ -483,6 +483,7 @@ ActiveRecord::Schema[7.1].define(version: 2023_10_13_194635) do
    t.bigint "contributor_id"
    t.datetime "created_at", null: false
    t.datetime "updated_at", null: false
+    t.datetime "reserved_until"
    t.index ["contributor_type", "contributor_id"], name: "index_runners_on_user"
    t.index ["execution_environment_id"], name: "index_runners_on_execution_environment_id"
  end
--- a/spec/models/execution_environment_spec.rb
+++ b/spec/models/execution_environment_spec.rb
@ -175,7 +175,7 @@ RSpec.describe ExecutionEnvironment do
    it 'executes the validation command' do
      allow(runner).to receive(:execute_command).and_return({})
      working_docker_image?
-      expect(runner).to have_received(:execute_command).with(ExecutionEnvironment::VALIDATION_COMMAND)
+      expect(runner).to have_received(:execute_command).with(ExecutionEnvironment::VALIDATION_COMMAND, exclusive: false)
    end

    context 'when the command produces an error' do