Exclusively lock Runners during code executions

Previously, the same runner could be used multiple times with different submissions simultaneously. This, however, yielded errors, for example when one submission time oud (causing the running to be deleted) while another submission was still executed. Admin actions, such as the shell, can be still executed regardless of any other code execution. Fixes CODEOCEAN-HG Fixes openHPI/poseidon#423
2023-10-29 22:17:31 +01:00
parent 427b54d306
commit 8fc5123bae
16 changed files with 115 additions and 22 deletions
--- a/app/models/execution_environment.rb
+++ b/app/models/execution_environment.rb
@@ -92,7 +92,7 @@ class ExecutionEnvironment < ApplicationRecord
    retries = 0
    begin
      runner = Runner.for(author, self)
-      output = runner.execute_command(VALIDATION_COMMAND)
+      output = runner.execute_command(VALIDATION_COMMAND, exclusive: false)
      errors.add(:docker_image, "error: #{output[:stderr]}") if output[:stderr].present?
    rescue Runner::Error => e
      # In case of an Runner::Error, we retry multiple times before giving up.
--- a/app/models/runner.rb
+++ b/app/models/runner.rb
@@ -44,18 +44,24 @@ class Runner < ApplicationRecord
  end

  def copy_files(files)
+    reserve!
    @strategy.copy_files(files)
  rescue Runner::Error::RunnerNotFound
    request_new_id
    save
    @strategy.copy_files(files)
+  ensure
+    release!
  end

-  def download_file(...)
-    @strategy.download_file(...)
+  def download_file(desired_file, privileged_execution:, exclusive: true)
+    reserve! if exclusive
+    @strategy.download_file(desired_file, privileged_execution:)
+    release! if exclusive
  end

-  def retrieve_files(raise_exception: true, **)
+  def retrieve_files(raise_exception: true, exclusive: true, **)
+    reserve! if exclusive
    try = 0
    begin
      if try.nonzero?
@@ -77,12 +83,14 @@ class Runner < ApplicationRecord
      # We forward the exception if requested
      raise e if raise_exception && defined?(e) && e.present?

-      # Otherwise, we return an hash with empty files
+      # Otherwise, we return an hash with empty files and release the runner
+      release! if exclusive
      {'files' => []}
    end
  end

-  def attach_to_execution(command, privileged_execution: false, &block)
+  def attach_to_execution(command, privileged_execution: false, exclusive: true, &block)
+    reserve! if exclusive
    Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Starting execution with Runner #{id} for #{contributor_type} #{contributor_id}." }
    starting_time = Time.zone.now
    begin
@@ -100,11 +108,12 @@ class Runner < ApplicationRecord
      e.execution_duration = Time.zone.now - starting_time
      raise
    end
+    release! if exclusive
    Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Stopped execution with Runner #{id} for #{contributor_type} #{contributor_id}." }
    Time.zone.now - starting_time # execution duration
  end

-  def execute_command(command, privileged_execution: false, raise_exception: true)
+  def execute_command(command, privileged_execution: false, raise_exception: true, exclusive: true)
    output = {
      stdout: +'',
      stderr: +'',
@@ -119,7 +128,7 @@ class Runner < ApplicationRecord
        save
      end

-      execution_time = attach_to_execution(command, privileged_execution:) do |socket, starting_time|
+      execution_time = attach_to_execution(command, privileged_execution:, exclusive:) do |socket, starting_time|
        socket.on :stderr do |data|
          output[:stderr] << data
          output[:messages].push({cmd: :write, stream: :stderr, log: data, timestamp: Time.zone.now - starting_time})
@@ -139,6 +148,9 @@ class Runner < ApplicationRecord
    rescue Runner::Error::OutOfMemory => e
      Rails.logger.debug { "Running command `#{command}` caused an out of memory error: #{e.message}" }
      output.merge!(status: :out_of_memory, container_execution_time: e.execution_duration)
+    rescue Runner::Error::RunnerInUse => e
+      Rails.logger.debug { "Running command `#{command}` failed because the runner was already in use: #{e.message}" }
+      output.merge!(status: :runner_in_use, container_execution_time: e.execution_duration)
    rescue Runner::Error::RunnerNotFound => e
      Rails.logger.debug { "Running command `#{command}` failed for the first time: #{e.message}" }
      try += 1
@@ -166,6 +178,27 @@ class Runner < ApplicationRecord

  def destroy_at_management
    @strategy.destroy_at_management
+    update!(runner_id: nil, reserved_until: nil)
+  end
+
+  def reserve!
+    with_lock do
+      if reserved_until.present? && reserved_until > Time.zone.now
+        @error = Runner::Error::RunnerInUse.new("The desired Runner #{id} is already in use until #{reserved_until.iso8601}.")
+        raise @error
+      else
+        update!(reserved_until: Time.zone.now + execution_environment.permitted_execution_time.seconds)
+        @error = nil
+      end
+    end
+  end
+
+  def release!
+    return if @error.present?
+
+    with_lock do
+      update!(reserved_until: nil)
+    end
  end

  private
--- a/app/models/testrun.rb
+++ b/app/models/testrun.rb
@@ -14,6 +14,7 @@ class Testrun < ApplicationRecord
    timeout: 3,
    out_of_memory: 4,
    terminated_by_client: 5,
+    runner_in_use: 6,
  }, _default: :ok, _prefix: true

  validates :exit_code, numericality: {only_integer: true, min: 0, max: 255}, allow_nil: true