From 8fc5123bae62f2c2901e9230cd693ef77bc1222b Mon Sep 17 00:00:00 2001
From: Sebastian Serth <Sebastian.Serth@hpi.de>
Date: Sun, 29 Oct 2023 22:17:31 +0100
Subject: [PATCH] Exclusively lock Runners during code executions

Previously, the same runner could be used multiple times with different submissions simultaneously. This, however, yielded errors, for example when one submission time oud (causing the running to be deleted) while another submission was still executed.

Admin actions, such as the shell, can be still executed regardless of any other code execution.

Fixes CODEOCEAN-HG
Fixes openHPI/poseidon#423
---
 app/assets/javascripts/editor/editor.js.erb   |  9 ++++
 app/assets/javascripts/editor/evaluation.js   |  5 ++
 .../execution_environments_controller.rb      |  6 +--
 app/controllers/live_streams_controller.rb    |  6 +--
 .../remote_evaluation_controller.rb           |  3 ++
 .../request_for_comments_controller.rb        | 19 ++++++--
 app/controllers/submissions_controller.rb     | 23 ++++++++-
 app/errors/runner/error.rb                    |  2 +
 app/models/execution_environment.rb           |  2 +-
 app/models/runner.rb                          | 47 ++++++++++++++++---
 app/models/testrun.rb                         |  1 +
 config/locales/de.yml                         |  1 +
 config/locales/en.yml                         |  1 +
 ...029172331_add_reserved_until_to_runners.rb |  7 +++
 db/schema.rb                                  |  3 +-
 spec/models/execution_environment_spec.rb     |  2 +-
 16 files changed, 115 insertions(+), 22 deletions(-)
 create mode 100644 db/migrate/20231029172331_add_reserved_until_to_runners.rb

diff --git a/app/assets/javascripts/editor/editor.js.erb b/app/assets/javascripts/editor/editor.js.erb
index f05914e6..970a4696 100644
--- a/app/assets/javascripts/editor/editor.js.erb
+++ b/app/assets/javascripts/editor/editor.js.erb
@@ -773,6 +773,8 @@ var CodeOceanEditor = {
             this.showContainerDepletedMessage();
         } else if (output.status === 'out_of_memory') {
             this.showOutOfMemoryMessage();
+        } else if (output.status === 'runner_in_use') {
+            this.showRunnerInUseMessage();
         }
     },
 
@@ -832,6 +834,13 @@ var CodeOceanEditor = {
         });
     },
 
+    showRunnerInUseMessage: function () {
+        $.flash.warning({
+            icon: ['fa-solid', 'fa-triangle-exclamation'],
+            text: I18n.t('exercises.editor.runner_in_use')
+        });
+    },
+
     showTimeoutMessage: function () {
         $.flash.info({
             icon: ['fa-regular', 'fa-clock'],
diff --git a/app/assets/javascripts/editor/evaluation.js b/app/assets/javascripts/editor/evaluation.js
index 317a3a86..c493f98d 100644
--- a/app/assets/javascripts/editor/evaluation.js
+++ b/app/assets/javascripts/editor/evaluation.js
@@ -109,6 +109,11 @@ CodeOceanEditorEvaluation = {
         })) {
             this.showOutOfMemoryMessage();
         }
+        if (_.some(response, function (result) {
+            return result.status === 'runner_in_use';
+        })) {
+            this.showRunnerInUseMessage();
+        }
         if (_.some(response, function (result) {
             return result.status === 'container_depleted';
         })) {
diff --git a/app/controllers/execution_environments_controller.rb b/app/controllers/execution_environments_controller.rb
index 39c09c43..640e47de 100644
--- a/app/controllers/execution_environments_controller.rb
+++ b/app/controllers/execution_environments_controller.rb
@@ -33,7 +33,7 @@ class ExecutionEnvironmentsController < ApplicationController
   def execute_command
     runner = Runner.for(current_user, @execution_environment)
     @privileged_execution = ActiveModel::Type::Boolean.new.cast(params[:sudo]) || @execution_environment.privileged_execution
-    output = runner.execute_command(params[:command], privileged_execution: @privileged_execution, raise_exception: false)
+    output = runner.execute_command(params[:command], privileged_execution: @privileged_execution, raise_exception: false, exclusive: false)
     render json: output.except(:messages)
   end
 
@@ -41,11 +41,11 @@ class ExecutionEnvironmentsController < ApplicationController
     runner = Runner.for(current_user, @execution_environment)
     @privileged_execution = ActiveModel::Type::Boolean.new.cast(params[:sudo]) || @execution_environment.privileged_execution
     begin
-      files = runner.retrieve_files(path: params[:path], recursive: false, privileged_execution: @privileged_execution)
+      files = runner.retrieve_files(path: params[:path], recursive: false, privileged_execution: @privileged_execution, exclusive: false)
       downloadable_files, additional_directories = convert_files_json_to_files files
       js_tree = FileTree.new(downloadable_files, additional_directories, force_closed: true).to_js_tree
       render json: js_tree[:core][:data]
-    rescue Runner::Error::RunnerNotFound, Runner::Error::WorkspaceError
+    rescue Runner::Error::RunnerNotFound, Runner::Error::WorkspaceError, Runner::Error::RunnerInUse
       render json: []
     end
   end
diff --git a/app/controllers/live_streams_controller.rb b/app/controllers/live_streams_controller.rb
index 47f5b313..c51a6075 100644
--- a/app/controllers/live_streams_controller.rb
+++ b/app/controllers/live_streams_controller.rb
@@ -26,15 +26,15 @@ class LiveStreamsController < ApplicationController
     runner = Runner.for(current_user, @execution_environment)
     fallback_location = shell_execution_environment_path(@execution_environment)
     privileged = params[:sudo] || @execution_environment.privileged_execution?
-    send_runner_file(runner, desired_file, fallback_location, privileged:)
+    send_runner_file(runner, desired_file, fallback_location, privileged:, exclusive: false)
   end
 
   private
 
-  def send_runner_file(runner, desired_file, redirect_fallback = root_path, privileged: false)
+  def send_runner_file(runner, desired_file, redirect_fallback = root_path, privileged: false, exclusive: true)
     filename = File.basename(desired_file)
     send_stream(filename:, type: 'application/octet-stream', disposition: 'attachment') do |stream|
-      runner.download_file(desired_file, privileged_execution: privileged) do |chunk, overall_size, _content_type|
+      runner.download_file(desired_file, privileged_execution: privileged, exclusive:) do |chunk, overall_size, _content_type|
         unless response.committed?
           # Disable Rack::ETag, which would otherwise cause the response to be cached
           # See https://github.com/rack/rack/issues/1619#issuecomment-848460528
diff --git a/app/controllers/remote_evaluation_controller.rb b/app/controllers/remote_evaluation_controller.rb
index 1c4b4b6b..caa63f66 100644
--- a/app/controllers/remote_evaluation_controller.rb
+++ b/app/controllers/remote_evaluation_controller.rb
@@ -72,6 +72,9 @@ class RemoteEvaluationController < ApplicationController
       # TODO: check token expired?
       {message: 'No exercise found for this validation_token! Please keep out!', status: 401}
     end
+  rescue Runner::Error::RunnerInUse => e
+    Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+    {message: I18n.t('exercises.editor.runner_in_use'), status: 409}
   rescue Runner::Error => e
     Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" }
     {message: I18n.t('exercises.editor.depleted'), status: 503}
diff --git a/app/controllers/request_for_comments_controller.rb b/app/controllers/request_for_comments_controller.rb
index 634fa048..b92de462 100644
--- a/app/controllers/request_for_comments_controller.rb
+++ b/app/controllers/request_for_comments_controller.rb
@@ -167,11 +167,20 @@ class RequestForCommentsController < ApplicationController
 
     respond_to do |format|
       if @request_for_comment.save
-        # execute the tests here and wait until they finished.
-        # As the same runner is used for the score and test run, no parallelization is possible
-        # A run is triggered from the frontend and does not need to be handled here.
-        @request_for_comment.submission.calculate_score(current_user)
-        format.json { render :show, status: :created, location: @request_for_comment }
+        begin
+          # execute the tests here and wait until they finished.
+          # As the same runner is used for the score and test run, no parallelization is possible
+          # A run is triggered from the frontend and does not need to be handled here.
+          @request_for_comment.submission.calculate_score(current_user)
+        rescue Runner::Error::RunnerInUse => e
+          Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+          format.json { render json: {error: t('exercises.editor.runner_in_use'), status: :runner_in_use}, status: :conflict }
+        rescue Runner::Error => e
+          Rails.logger.debug { "Runner error while requesting comments: #{e.message}" }
+          format.json { render json: {danger: t('exercises.editor.depleted'), status: :container_depleted}, status: :service_unavailable }
+        else
+          format.json { render :show, status: :created, location: @request_for_comment }
+        end
       else
         format.html { render :new }
         format.json { render json: @request_for_comment.errors, status: :unprocessable_entity }
diff --git a/app/controllers/submissions_controller.rb b/app/controllers/submissions_controller.rb
index b60eb36a..9283063b 100644
--- a/app/controllers/submissions_controller.rb
+++ b/app/controllers/submissions_controller.rb
@@ -234,6 +234,12 @@ class SubmissionsController < ApplicationController
     @testrun[:exit_code] ||= 137
     @testrun[:output] = "out_of_memory: #{@testrun[:output]}"
     extract_durations(e)
+  rescue Runner::Error::RunnerInUse => e
+    send_and_store client_socket, {cmd: :status, status: :runner_in_use}
+    Rails.logger.debug { "Running a submission failed because the runner was already in use: #{e.message}" }
+    @testrun[:status] ||= :runner_in_use
+    @testrun[:output] = "runner_in_use: #{@testrun[:output]}"
+    extract_durations(e)
   rescue Runner::Error => e
     # Regardless of the specific error cause, we send a `container_depleted` status to the client.
     send_and_store client_socket, {cmd: :status, status: :container_depleted}
@@ -266,6 +272,14 @@ class SubmissionsController < ApplicationController
     client_socket&.send_data(JSON.dump(@submission.calculate_score(current_user)))
     # To enable hints when scoring a submission, uncomment the next line:
     # send_hints(client_socket, StructuredError.where(submission: @submission))
+  rescue Runner::Error::RunnerInUse => e
+    extract_durations(e)
+    send_and_store client_socket, {cmd: :status, status: :runner_in_use}
+    Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+    @testrun[:passed] = false
+    @testrun[:status] ||= :runner_in_use
+    @testrun[:output] = "runner_in_use: #{@testrun[:output]}"
+    save_testrun_output 'assess'
   rescue Runner::Error => e
     extract_durations(e)
     send_and_store client_socket, {cmd: :status, status: :container_depleted}
@@ -301,10 +315,17 @@ class SubmissionsController < ApplicationController
 
     # The score is stored separately, we can forward it to the client immediately
     client_socket&.send_data(JSON.dump(@submission.test(@file, current_user)))
+  rescue Runner::Error::RunnerInUse => e
+    extract_durations(e)
+    send_and_store client_socket, {cmd: :status, status: :runner_in_use}
+    Rails.logger.debug { "Scoring a submission failed because the runner was already in use: #{e.message}" }
+    @testrun[:passed] = false
+    @testrun[:status] ||= :runner_in_use
+    @testrun[:output] = "runner_in_use: #{@testrun[:output]}"
+    save_testrun_output 'assess'
   rescue Runner::Error => e
     extract_durations(e)
     send_and_store client_socket, {cmd: :status, status: :container_depleted}
-    kill_client_socket(client_socket)
     Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" }
     Sentry.capture_exception(e)
     @testrun[:passed] = false
diff --git a/app/errors/runner/error.rb b/app/errors/runner/error.rb
index cc88de17..0084902c 100644
--- a/app/errors/runner/error.rb
+++ b/app/errors/runner/error.rb
@@ -16,6 +16,8 @@ class Runner
 
     class Unauthorized < Error; end
 
+    class RunnerInUse < Error; end
+
     class RunnerNotFound < Error; end
 
     class FaradayError < Error; end
diff --git a/app/models/execution_environment.rb b/app/models/execution_environment.rb
index 7e5fa9ac..9bb3eab7 100644
--- a/app/models/execution_environment.rb
+++ b/app/models/execution_environment.rb
@@ -92,7 +92,7 @@ class ExecutionEnvironment < ApplicationRecord
     retries = 0
     begin
       runner = Runner.for(author, self)
-      output = runner.execute_command(VALIDATION_COMMAND)
+      output = runner.execute_command(VALIDATION_COMMAND, exclusive: false)
       errors.add(:docker_image, "error: #{output[:stderr]}") if output[:stderr].present?
     rescue Runner::Error => e
       # In case of an Runner::Error, we retry multiple times before giving up.
diff --git a/app/models/runner.rb b/app/models/runner.rb
index 293a6c18..dfcd16d1 100644
--- a/app/models/runner.rb
+++ b/app/models/runner.rb
@@ -44,18 +44,24 @@ class Runner < ApplicationRecord
   end
 
   def copy_files(files)
+    reserve!
     @strategy.copy_files(files)
   rescue Runner::Error::RunnerNotFound
     request_new_id
     save
     @strategy.copy_files(files)
+  ensure
+    release!
   end
 
-  def download_file(...)
-    @strategy.download_file(...)
+  def download_file(desired_file, privileged_execution:, exclusive: true)
+    reserve! if exclusive
+    @strategy.download_file(desired_file, privileged_execution:)
+    release! if exclusive
   end
 
-  def retrieve_files(raise_exception: true, **)
+  def retrieve_files(raise_exception: true, exclusive: true, **)
+    reserve! if exclusive
     try = 0
     begin
       if try.nonzero?
@@ -77,12 +83,14 @@ class Runner < ApplicationRecord
       # We forward the exception if requested
       raise e if raise_exception && defined?(e) && e.present?
 
-      # Otherwise, we return an hash with empty files
+      # Otherwise, we return an hash with empty files and release the runner
+      release! if exclusive
       {'files' => []}
     end
   end
 
-  def attach_to_execution(command, privileged_execution: false, &block)
+  def attach_to_execution(command, privileged_execution: false, exclusive: true, &block)
+    reserve! if exclusive
     Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Starting execution with Runner #{id} for #{contributor_type} #{contributor_id}." }
     starting_time = Time.zone.now
     begin
@@ -100,11 +108,12 @@ class Runner < ApplicationRecord
       e.execution_duration = Time.zone.now - starting_time
       raise
     end
+    release! if exclusive
     Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Stopped execution with Runner #{id} for #{contributor_type} #{contributor_id}." }
     Time.zone.now - starting_time # execution duration
   end
 
-  def execute_command(command, privileged_execution: false, raise_exception: true)
+  def execute_command(command, privileged_execution: false, raise_exception: true, exclusive: true)
     output = {
       stdout: +'',
       stderr: +'',
@@ -119,7 +128,7 @@ class Runner < ApplicationRecord
         save
       end
 
-      execution_time = attach_to_execution(command, privileged_execution:) do |socket, starting_time|
+      execution_time = attach_to_execution(command, privileged_execution:, exclusive:) do |socket, starting_time|
         socket.on :stderr do |data|
           output[:stderr] << data
           output[:messages].push({cmd: :write, stream: :stderr, log: data, timestamp: Time.zone.now - starting_time})
@@ -139,6 +148,9 @@ class Runner < ApplicationRecord
     rescue Runner::Error::OutOfMemory => e
       Rails.logger.debug { "Running command `#{command}` caused an out of memory error: #{e.message}" }
       output.merge!(status: :out_of_memory, container_execution_time: e.execution_duration)
+    rescue Runner::Error::RunnerInUse => e
+      Rails.logger.debug { "Running command `#{command}` failed because the runner was already in use: #{e.message}" }
+      output.merge!(status: :runner_in_use, container_execution_time: e.execution_duration)
     rescue Runner::Error::RunnerNotFound => e
       Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" }
       try += 1
@@ -166,6 +178,27 @@ class Runner < ApplicationRecord
 
   def destroy_at_management
     @strategy.destroy_at_management
+    update!(runner_id: nil, reserved_until: nil)
+  end
+
+  def reserve!
+    with_lock do
+      if reserved_until.present? && reserved_until > Time.zone.now
+        @error = Runner::Error::RunnerInUse.new("The desired Runner #{id} is already in use until #{reserved_until.iso8601}.")
+        raise @error
+      else
+        update!(reserved_until: Time.zone.now + execution_environment.permitted_execution_time.seconds)
+        @error = nil
+      end
+    end
+  end
+
+  def release!
+    return if @error.present?
+
+    with_lock do
+      update!(reserved_until: nil)
+    end
   end
 
   private
diff --git a/app/models/testrun.rb b/app/models/testrun.rb
index ed680461..9131790c 100644
--- a/app/models/testrun.rb
+++ b/app/models/testrun.rb
@@ -14,6 +14,7 @@ class Testrun < ApplicationRecord
     timeout: 3,
     out_of_memory: 4,
     terminated_by_client: 5,
+    runner_in_use: 6,
   }, _default: :ok, _prefix: true
 
   validates :exit_code, numericality: {only_integer: true, min: 0, max: 255}, allow_nil: true
diff --git a/config/locales/de.yml b/config/locales/de.yml
index dcc63768..8c2a08d9 100644
--- a/config/locales/de.yml
+++ b/config/locales/de.yml
@@ -398,6 +398,7 @@ de:
       network: 'Während Ihr Code läuft, ist Port %{port} unter folgender Adresse erreichbar: <a href="%{address}" target="_blank" rel="noopener">%{address}</a>.'
       render: Anzeigen
       run: Ausführen
+      runner_in_use: Sie führen momentan Code aus. Bitte stoppen Sie die vorherige Ausführung oder warten Sie einen Moment, bevor Sie fortfahren.
       run_failure: Ihr Code konnte nicht auf der Plattform ausgeführt werden.
       run_success: Ihr Code wurde auf der Plattform ausgeführt.
       requestComments: Kommentare erbitten
diff --git a/config/locales/en.yml b/config/locales/en.yml
index a42d0ae7..6ccb59c5 100644
--- a/config/locales/en.yml
+++ b/config/locales/en.yml
@@ -398,6 +398,7 @@ en:
       network: 'While your code is running, port %{port} is accessible using the following address: <a href="%{address}" target="_blank" rel="noopener">%{address}</a>.'
       render: Render
       run: Run
+      runner_in_use: You are currently running code. Please stop the previous execution, or wait a moment before proceeding.
       run_failure: Your code could not be run.
       run_success: Your code was run on our servers.
       requestComments: Request Comments
diff --git a/db/migrate/20231029172331_add_reserved_until_to_runners.rb b/db/migrate/20231029172331_add_reserved_until_to_runners.rb
new file mode 100644
index 00000000..b4045eb6
--- /dev/null
+++ b/db/migrate/20231029172331_add_reserved_until_to_runners.rb
@@ -0,0 +1,7 @@
+# frozen_string_literal: true
+
+class AddReservedUntilToRunners < ActiveRecord::Migration[7.1]
+  def change
+    add_column :runners, :reserved_until, :datetime, null: true, default: nil
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 29b6abbb..560eb378 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema[7.1].define(version: 2023_10_13_194635) do
+ActiveRecord::Schema[7.1].define(version: 2023_10_29_172331) do
   # These are extensions that must be enabled in order to support this database
   enable_extension "pg_trgm"
   enable_extension "pgcrypto"
@@ -483,6 +483,7 @@ ActiveRecord::Schema[7.1].define(version: 2023_10_13_194635) do
     t.bigint "contributor_id"
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
+    t.datetime "reserved_until"
     t.index ["contributor_type", "contributor_id"], name: "index_runners_on_user"
     t.index ["execution_environment_id"], name: "index_runners_on_execution_environment_id"
   end
diff --git a/spec/models/execution_environment_spec.rb b/spec/models/execution_environment_spec.rb
index bbaeddab..fc03843e 100644
--- a/spec/models/execution_environment_spec.rb
+++ b/spec/models/execution_environment_spec.rb
@@ -175,7 +175,7 @@ RSpec.describe ExecutionEnvironment do
     it 'executes the validation command' do
       allow(runner).to receive(:execute_command).and_return({})
       working_docker_image?
-      expect(runner).to have_received(:execute_command).with(ExecutionEnvironment::VALIDATION_COMMAND)
+      expect(runner).to have_received(:execute_command).with(ExecutionEnvironment::VALIDATION_COMMAND, exclusive: false)
     end
 
     context 'when the command produces an error' do