Merge pull request #1242 from openHPI/refactor_testrun_table

Refactor testrun table
2022-05-04 00:25:08 +02:00
parent 2ce9c4258a 4ef7da839d
commit e4384d7ddd
19 changed files with 507 additions and 108 deletions
--- a/app/assets/javascripts/editor/editor.js.erb
+++ b/app/assets/javascripts/editor/editor.js.erb
@ -703,6 +703,8 @@ var CodeOceanEditor = {
            this.showTimeoutMessage();
        } else if (output.status === 'container_depleted') {
            this.showContainerDepletedMessage();
+        } else if (output.status === 'out_of_memory') {
+            this.showOutOfMemoryMessage();
        } else if (output.stderr) {
            $.flash.danger({
                icon: ['fa', 'fa-bug'],
--- a/app/assets/javascripts/editor/execution.js
+++ b/app/assets/javascripts/editor/execution.js
@ -46,8 +46,6 @@ CodeOceanEditorWebsocket = {
    this.websocket.on('turtlebatch', this.handleTurtlebatchCommand.bind(this));
    this.websocket.on('render', this.renderWebsocketOutput.bind(this));
    this.websocket.on('exit', this.handleExitCommand.bind(this));
-    this.websocket.on('timeout', this.showTimeoutMessage.bind(this));
-    this.websocket.on('out_of_memory', this.showOutOfMemoryMessage.bind(this));
    this.websocket.on('status', this.showStatus.bind(this));
    this.websocket.on('hint', this.showHint.bind(this));
  },
--- a/app/controllers/execution_environments_controller.rb
+++ b/app/controllers/execution_environments_controller.rb
@ -30,7 +30,7 @@ class ExecutionEnvironmentsController < ApplicationController
  def execute_command
    runner = Runner.for(current_user, @execution_environment)
    output = runner.execute_command(params[:command], raise_exception: false)
-    render json: output
+    render json: output.except(:messages)
  end

  def working_time_query
--- a/app/controllers/submissions_controller.rb
+++ b/app/controllers/submissions_controller.rb
@ -9,6 +9,7 @@ class SubmissionsController < ApplicationController

  before_action :require_user!
  before_action :set_submission, only: %i[download download_file render_file run score show statistics test]
+  before_action :set_testrun, only: %i[run score test]
  before_action :set_files, only: %i[download show]
  before_action :set_files_and_specific_file, only: %i[download_file render_file run test]
  before_action :set_mime_type, only: %i[download_file render_file]
@ -92,6 +93,7 @@ class SubmissionsController < ApplicationController

      client_socket.onclose do |_event|
        runner_socket&.close(:terminated_by_client)
+        # We do not update the @testrun[:status] by design, it would be missleading
      end

      client_socket.onmessage do |raw_event|
@ -100,9 +102,17 @@ class SubmissionsController < ApplicationController

        # Otherwise, we expect to receive a JSON: Parsing.
        event = JSON.parse(raw_event).deep_symbolize_keys
+        event[:cmd] = event[:cmd].to_sym
+        event[:stream] = event[:stream].to_sym if event.key? :stream

-        case event[:cmd].to_sym
+        # We could store the received event. However, it is also echoed by the container
+        # and correctly identified as the original input. Therefore, we don't store
+        # it here to prevent duplicated events.
+        # @testrun[:messages].push(event)
+
+        case event[:cmd]
          when :client_kill
+            @testrun[:status] = :terminated_by_client
            close_client_connection(client_socket)
            Rails.logger.debug('Client exited container.')
          when :result, :canvasevent, :exception
@ -128,62 +138,61 @@ class SubmissionsController < ApplicationController
      end
    end

-    @output = +''
-    durations = @submission.run(@file) do |socket|
+    @testrun[:output] = +''
+    durations = @submission.run(@file) do |socket, starting_time|
      runner_socket = socket
+      @testrun[:starting_time] = starting_time
      client_socket.send_data JSON.dump({cmd: :status, status: :container_running})

      runner_socket.on :stdout do |data|
-        json_data = prepare data, :stdout
-        @output << json_data[0, max_output_buffer_size - @output.size]
-        client_socket.send_data(json_data)
+        message = retrieve_message_from_output data, :stdout
+        @testrun[:output] << message[:data][0, max_output_buffer_size - @testrun[:output].size] if message[:data]
+        send_and_store client_socket, message
      end

      runner_socket.on :stderr do |data|
-        json_data = prepare data, :stderr
-        @output << json_data[0, max_output_buffer_size - @output.size]
-        client_socket.send_data(json_data)
+        message = retrieve_message_from_output data, :stderr
+        @testrun[:output] << message[:data][0, max_output_buffer_size - @testrun[:output].size] if message[:data]
+        send_and_store client_socket, message
      end

      runner_socket.on :exit do |exit_code|
-        @exit_code = exit_code
+        @testrun[:exit_code] = exit_code
        exit_statement =
-          if @output.empty? && exit_code.zero?
-            @status = :ok
+          if @testrun[:output].empty? && exit_code.zero?
+            @testrun[:status] = :ok
            t('exercises.implement.no_output_exit_successful', timestamp: l(Time.zone.now, format: :short), exit_code: exit_code)
-          elsif @output.empty?
-            @status = :failed
+          elsif @testrun[:output].empty?
+            @testrun[:status] = :failed
            t('exercises.implement.no_output_exit_failure', timestamp: l(Time.zone.now, format: :short), exit_code: exit_code)
          elsif exit_code.zero?
-            @status = :ok
+            @testrun[:status] = :ok
            "\n#{t('exercises.implement.exit_successful', timestamp: l(Time.zone.now, format: :short), exit_code: exit_code)}"
          else
-            @status = :failed
+            @testrun[:status] = :failed
            "\n#{t('exercises.implement.exit_failure', timestamp: l(Time.zone.now, format: :short), exit_code: exit_code)}"
          end
-        client_socket.send_data JSON.dump({cmd: :write, stream: :stdout, data: "#{exit_statement}\n"})
+        send_and_store client_socket, {cmd: :write, stream: :stdout, data: "#{exit_statement}\n"}
        if exit_code == 137
-          client_socket.send_data JSON.dump({cmd: :out_of_memory})
-          @status = :out_of_memory
+          send_and_store client_socket, {cmd: :status, status: :out_of_memory}
+          @testrun[:status] = :out_of_memory
        end

        close_client_connection(client_socket)
      end
    end
-    @container_execution_time = durations[:execution_duration]
-    @waiting_for_container_time = durations[:waiting_duration]
+    @testrun[:container_execution_time] = durations[:execution_duration]
+    @testrun[:waiting_for_container_time] = durations[:waiting_duration]
  rescue Runner::Error::ExecutionTimeout => e
-    client_socket.send_data JSON.dump({cmd: :status, status: :timeout})
+    send_and_store client_socket, {cmd: :status, status: :timeout}
    close_client_connection(client_socket)
    Rails.logger.debug { "Running a submission timed out: #{e.message}" }
-    @output = "timeout: #{@output}"
-    @status = :timeout
+    @testrun[:output] = "timeout: #{@testrun[:output]}"
    extract_durations(e)
  rescue Runner::Error => e
-    client_socket.send_data JSON.dump({cmd: :status, status: :container_depleted})
+    send_and_store client_socket, {cmd: :status, status: :container_depleted}
    close_client_connection(client_socket)
    Rails.logger.debug { "Runner error while running a submission: #{e.message}" }
-    @status = :container_depleted
    extract_durations(e)
  ensure
    save_testrun_output 'run'
@ -195,17 +204,17 @@ class SubmissionsController < ApplicationController
        switch_locale do
          kill_client_socket(tubesock) if @embed_options[:disable_score]

+          # The score is stored separately, we can forward it to the client immediately
          tubesock.send_data(JSON.dump(@submission.calculate_score))
          # To enable hints when scoring a submission, uncomment the next line:
          # send_hints(tubesock, StructuredError.where(submission: @submission))
          kill_client_socket(tubesock)
        rescue Runner::Error => e
-          tubesock.send_data JSON.dump({cmd: :status, status: :container_depleted})
+          extract_durations(e)
+          send_and_store tubesock, {cmd: :status, status: :container_depleted}
          kill_client_socket(tubesock)
          Rails.logger.debug { "Runner error while scoring submission #{@submission.id}: #{e.message}" }
-          @passed = false
-          @status = :container_depleted
-          extract_durations(e)
+          @testrun[:passed] = false
          save_testrun_output 'assess'
        end
      end
@ -222,15 +231,15 @@ class SubmissionsController < ApplicationController
        switch_locale do
          kill_client_socket(tubesock) if @embed_options[:disable_run]

+          # The score is stored separately, we can forward it to the client immediately
          tubesock.send_data(JSON.dump(@submission.test(@file)))
          kill_client_socket(tubesock)
        rescue Runner::Error => e
-          tubesock.send_data JSON.dump({cmd: :status, status: :container_depleted})
+          extract_durations(e)
+          send_and_store tubesock, {cmd: :status, status: :container_depleted}
          kill_client_socket(tubesock)
          Rails.logger.debug { "Runner error while testing submission #{@submission.id}: #{e.message}" }
-          @passed = false
-          @status = :container_depleted
-          extract_durations(e)
+          @testrun[:passed] = false
          save_testrun_output 'assess'
        end
      end
@ -251,6 +260,7 @@ class SubmissionsController < ApplicationController
  end

  def kill_client_socket(client_socket)
+    # We don't want to store this (arbitrary) exit command and redirect it ourselves
    client_socket.send_data JSON.dump({cmd: :exit})
    client_socket.close
  end
@ -279,21 +289,29 @@ class SubmissionsController < ApplicationController
  end

  def extract_durations(error)
-    @container_execution_time = error.execution_duration
-    @waiting_for_container_time = error.waiting_duration
+    @testrun[:starting_time] = error.starting_time
+    @testrun[:container_execution_time] = error.execution_duration
+    @testrun[:waiting_for_container_time] = error.waiting_duration
  end

  def extract_errors
    results = []
-    if @output.present?
+    if @testrun[:output].present?
      @submission.exercise.execution_environment.error_templates.each do |template|
        pattern = Regexp.new(template.signature).freeze
-        results << StructuredError.create_from_template(template, @output, @submission) if pattern.match(@output)
+        results << StructuredError.create_from_template(template, @testrun[:output], @submission) if pattern.match(@testrun[:output])
      end
    end
    results
  end

+  def send_and_store(client_socket, message)
+    message[:timestamp] = ActiveSupport::Duration.build(Time.zone.now - @testrun[:starting_time])
+    @testrun[:messages].push message
+    @testrun[:status] = message[:status] if message[:status]
+    client_socket.send_data JSON.dump(message)
+  end
+
  def max_output_buffer_size
    if @submission.cause == 'requestComments'
      5000
@ -302,14 +320,6 @@ class SubmissionsController < ApplicationController
    end
  end

-  def prepare(data, stream)
-    if valid_command? data
-      data
-    else
-      JSON.dump({cmd: :write, stream: stream, data: data})
-    end
-  end
-
  def sanitize_filename
    params[:filename].gsub(/\.json$/, '')
  end
@ -318,15 +328,16 @@ class SubmissionsController < ApplicationController
  def save_testrun_output(cause)
    testrun = Testrun.create!(
      file: @file,
-      passed: @passed,
+      passed: @testrun[:passed],
      cause: cause,
      submission: @submission,
-      exit_code: @exit_code, # might be nil, e.g., when the run did not finish
-      status: @status,
-      output: @output.presence, # TODO: Remove duplicated saving of the output after creating TestrunMessages
-      container_execution_time: @container_execution_time,
-      waiting_for_container_time: @waiting_for_container_time
+      exit_code: @testrun[:exit_code], # might be nil, e.g., when the run did not finish
+      status: @testrun[:status],
+      output: @testrun[:output].presence, # TODO: Remove duplicated saving of the output after creating TestrunMessages
+      container_execution_time: @testrun[:container_execution_time],
+      waiting_for_container_time: @testrun[:waiting_for_container_time]
    )
+    TestrunMessage.create_for(testrun, @testrun[:messages])
    TestrunExecutionEnvironment.create(testrun: testrun, execution_environment: @submission.used_execution_environment)
  end

@ -335,7 +346,7 @@ class SubmissionsController < ApplicationController

    errors = errors.to_a.uniq(&:hint)
    errors.each do |error|
-      tubesock.send_data JSON.dump({cmd: 'hint', hint: error.hint, description: error.error_template.description})
+      send_and_store tubesock, {cmd: :hint, hint: error.hint, description: error.error_template.description}
    end
  end

@ -361,10 +372,26 @@ class SubmissionsController < ApplicationController
    authorize!
  end

-  def valid_command?(data)
+  def set_testrun
+    @testrun = {
+      messages: [],
+      exit_code: nil,
+      status: nil,
+    }
+  end
+
+  def retrieve_message_from_output(data, stream)
    parsed = JSON.parse(data)
-    parsed.instance_of?(Hash) && parsed.key?('cmd')
+    if parsed.instance_of?(Hash) && parsed.key?('cmd')
+      parsed.symbolize_keys!
+      # Symbolize two values if present
+      parsed[:cmd] = parsed[:cmd].to_sym
+      parsed[:stream] = parsed[:stream].to_sym if parsed.key? :stream
+      parsed
+    else
+      {cmd: :write, stream: stream, data: data}
+    end
  rescue JSON::ParserError
-    false
+    {cmd: :write, stream: stream, data: data}
  end
 end
--- a/app/errors/runner/error.rb
+++ b/app/errors/runner/error.rb
@ -2,7 +2,7 @@

 class Runner
  class Error < ApplicationError
-    attr_accessor :waiting_duration, :execution_duration
+    attr_accessor :waiting_duration, :execution_duration, :starting_time

    class BadRequest < Error; end

--- a/app/models/application_record.rb
+++ b/app/models/application_record.rb
@ -8,7 +8,8 @@ class ApplicationRecord < ActiveRecord::Base
  def strip_strings
    # trim whitespace from beginning and end of string attributes
    # except for the `content` of CodeOcean::Files
-    attribute_names.without('content').each do |name|
+    # and except the `log` of TestrunMessages or the `output` of Testruns
+    attribute_names.without('content', 'log', 'output').each do |name|
      if send(name.to_sym).respond_to?(:strip)
        send("#{name}=".to_sym, send(name).strip)
      end
--- a/app/models/runner.rb
+++ b/app/models/runner.rb
@ -62,10 +62,11 @@ class Runner < ApplicationRecord
      # initializing its Runner::Connection with the given event loop. The Runner::Connection class ensures that
      # this event loop is stopped after the socket was closed.
      event_loop = Runner::EventLoop.new
-      socket = @strategy.attach_to_execution(command, event_loop, &block)
+      socket = @strategy.attach_to_execution(command, event_loop, starting_time, &block)
      event_loop.wait
      raise socket.error if socket.error.present?
    rescue Runner::Error => e
+      e.starting_time = starting_time
      e.execution_duration = Time.zone.now - starting_time
      raise
    end
@ -74,30 +75,34 @@ class Runner < ApplicationRecord
  end

  def execute_command(command, raise_exception: true)
-    output = {}
-    stdout = +''
-    stderr = +''
+    output = {
+      stdout: +'',
+      stderr: +'',
+      messages: [],
+      exit_code: 1, # default to error
+    }
    try = 0

-    exit_code = 1 # default to error
    begin
      if try.nonzero?
        request_new_id
        save
      end

-      execution_time = attach_to_execution(command) do |socket|
+      execution_time = attach_to_execution(command) do |socket, starting_time|
        socket.on :stderr do |data|
-          stderr << data
+          output[:stderr] << data
+          output[:messages].push({cmd: :write, stream: :stderr, log: data, timestamp: Time.zone.now - starting_time})
        end
        socket.on :stdout do |data|
-          stdout << data
+          output[:stdout] << data
+          output[:messages].push({cmd: :write, stream: :stdout, log: data, timestamp: Time.zone.now - starting_time})
        end
        socket.on :exit do |received_exit_code|
-          exit_code = received_exit_code
+          output[:exit_code] = received_exit_code
        end
      end
-      output.merge!(container_execution_time: execution_time, status: exit_code.zero? ? :ok : :failed)
+      output.merge!(container_execution_time: execution_time, status: output[:exit_code].zero? ? :ok : :failed)
    rescue Runner::Error::ExecutionTimeout => e
      Rails.logger.debug { "Running command `#{command}` timed out: #{e.message}" }
      output.merge!(status: :timeout, container_execution_time: e.execution_duration)
@ -122,8 +127,7 @@ class Runner < ApplicationRecord
      raise e if raise_exception && defined?(e) && e.present?

      # If the process was killed with SIGKILL, it is most likely that the OOM killer was triggered.
-      output[:status] = :out_of_memory if exit_code == 137
-      output.merge!(stdout: stdout, stderr: stderr, exit_code: exit_code)
+      output[:status] = :out_of_memory if output[:exit_code] == 137
    end
  end

--- a/app/models/submission.rb
+++ b/app/models/submission.rb
@ -257,6 +257,7 @@ class Submission < ApplicationRecord
      container_execution_time: output[:container_execution_time],
      waiting_for_container_time: output[:waiting_for_container_time]
    )
+    TestrunMessage.create_for(testrun, output[:messages])
    TestrunExecutionEnvironment.create(testrun: testrun, execution_environment: @used_execution_environment)

    filename = file.filepath
@ -271,6 +272,7 @@ class Submission < ApplicationRecord

    output.merge!(assessment)
    output.merge!(filename: filename, message: feedback_message(file, output), weight: file.weight)
+    output.except!(:messages)
  end

  def feedback_message(file, output)
--- a/app/models/testrun.rb
+++ b/app/models/testrun.rb
@ -12,8 +12,13 @@ class Testrun < ApplicationRecord
    container_depleted: 2,
    timeout: 3,
    out_of_memory: 4,
+    terminated_by_client: 5,
  }, _default: :ok, _prefix: true

  validates :exit_code, numericality: {only_integer: true, min: 0, max: 255}, allow_nil: true
  validates :status, presence: true
+
+  def log
+    testrun_messages.output.select(:log).map(&:log).join.presence
+  end
 end
--- a/app/models/testrun_message.rb
+++ b/app/models/testrun_message.rb
@ -11,13 +11,14 @@ class TestrunMessage < ApplicationRecord
    turtlebatch: 4,
    render: 5,
    exit: 6,
-    timeout: 7,
-    out_of_memory: 8,
-    status: 9,
-    hint: 10,
-    client_kill: 11,
-    exception: 12,
-    result: 13,
+    status: 7,
+    hint: 8,
+    client_kill: 9,
+    exception: 10,
+    result: 11,
+    canvasevent: 12,
+    timeout: 13, # TODO: Shouldn't be in the data, this is a status and can be removed after the migration finished
+    out_of_memory: 14, # TODO: Shouldn't be in the data, this is a status and can be removed after the migration finished
  }, _default: :write, _prefix: true

  enum stream: {
@ -28,9 +29,62 @@ class TestrunMessage < ApplicationRecord

  validates :cmd, presence: true
  validates :timestamp, presence: true
-
+  validates :stream, length: {minimum: 0, allow_nil: false}, if: -> { cmd_write? }
+  validates :log, length: {minimum: 0, allow_nil: false}, if: -> { cmd_write? }
  validate :either_data_or_log

+  default_scope { order(timestamp: :asc) }
+  scope :output, -> { where(cmd: 1, stream: %i[stdout stderr]) }
+
+  def self.create_for(testrun, messages)
+    # We don't want to store anything if the testrun passed
+    return if testrun.passed?
+
+    messages.map! do |message|
+      # We create a new hash and move all known keys
+      result = {}
+      result[:testrun] = testrun
+      result[:log] = (message.delete(:log) || message.delete(:data)) if message[:cmd] == :write || message.key?(:log)
+      result[:timestamp] = message.delete :timestamp
+      result[:stream] = message.delete :stream if message.key?(:stream)
+      result[:cmd] = message.delete :cmd
+      # The remaining keys will be stored in the `data` column
+      result[:data] = message.presence if message.present?
+      result
+    end
+
+    # Before storing all messages, we truncate some to save storage
+    filtered_messages = filter_messages_by_size testrun, messages
+
+    # An array with hashes is passed, all are stored
+    TestrunMessage.create!(filtered_messages)
+  end
+
+  def self.filter_messages_by_size(testrun, messages)
+    limits = if testrun.submission.cause == 'requestComments'
+               {data: {limit: 25, size: 0}, log: {limit: 5000, size: 0}}
+             else
+               {data: {limit: 10, size: 0}, log: {limit: 500, size: 0}}
+             end
+
+    filtered_messages = messages.map do |message|
+      if message.key?(:log) && limits[:log][:size] < limits[:log][:limit]
+        message[:log] = message[:log][0, limits[:log][:limit] - limits[:log][:size]]
+        limits[:log][:size] += message[:log].size
+      elsif message[:data] && limits[:data][:size] < limits[:data][:limit]
+        limits[:data][:size] += 1
+      elsif !message.key?(:log) && limits[:data][:size] < limits[:data][:limit]
+        # Accept short TestrunMessages (e.g. just transporting a status information)
+        # without increasing the `limits[:data][:limit]` before the limit is reached
+      else
+        # Clear all remaining messages
+        message = nil
+      end
+      message
+    end
+    filtered_messages.select(&:present?)
+  end
+
  def either_data_or_log
    if [data, log].count(&:present?) > 1
      errors.add(log, "can't be present if data is also present")
--- a/app/views/exercises/external_users/statistics.html.slim
+++ b/app/views/exercises/external_users/statistics.html.slim
@ -65,9 +65,9 @@ h1
                td.align-middle
                  -this.testruns.includes(:file).order("files.name").each do |run|
                    - if run.passed
-                      .unit-test-result.positive-result title=[run.file&.filepath, run.output].join("\n").strip
+                      .unit-test-result.positive-result title=[run.file&.filepath, run.log].join.strip
                    - else
-                      .unit-test-result.unknown-result title=[run.file&.filepath, run.output].join("\n").strip
+                      .unit-test-result.unknown-result title=[run.file&.filepath, run.log].join.strip
                td = @working_times_until[index] if index > 0 if policy(@exercise).detailed_statistics?
              - elsif this.is_a? UserExerciseIntervention
                td = this.created_at.strftime("%F %T")
--- a/app/views/request_for_comments/show.html.slim
+++ b/app/views/request_for_comments/show.html.slim
@ -38,20 +38,7 @@
          .collapsed.testrun-output.text
            span.fa.fa-chevron-down.collapse-button
            - output_runs.each do |testrun|
-              - output = testrun.try(:output)
-              - if output
-                - Sentry.set_extras(output: output)
-                - begin
-                  - Timeout::timeout(2) do
-                    // (?:\\"|.) is required to correctly identify " within the output.
-                    // The outer (?:   |\d+?) is used to correctly identify integers within the JSON
-                    - messages = output.scan(/{(?:(?:"(?:\\"|.)+?":(?:"(?:\\"|.)*?"|-?\d+?|\[.*?\]|null))+?,?)+}/)
-                    - messages.map! {|el| JSON.parse(el)}
-                    - messages.keep_if {|message| message['cmd'] == 'write'}
-                    - messages.map! {|message| message['data']}
-                    - output = messages.join ''
-                - rescue Timeout::Error
-              pre= output or t('request_for_comments.no_output')
+              pre= testrun.log or t('request_for_comments.no_output')

        - assess_runs = testruns.select {|run| run.cause == 'assess' }
        - unless @current_user.admin?
@ -64,7 +51,7 @@
                div class=("result #{testrun.passed ? 'passed' : 'failed'}")
                .collapsed.testrun-output.text
                  span.fa.fa-chevron-down.collapse-button
-                  pre= testrun.output or t('request_for_comments.no_output')
+                  pre= testrun.log or t('request_for_comments.no_output')

    - if @current_user.admin? && user.is_a?(ExternalUser)
      = render('admin_menu')
--- a/db/migrate/20220415215112_migrate_testruns.rb
+++ b/db/migrate/20220415215112_migrate_testruns.rb
@ -0,0 +1,317 @@
+# frozen_string_literal: true
+
+class MigrateTestruns < ActiveRecord::Migration[6.1]
+  # We are not changing any tables but only backfilling data.
+  disable_ddl_transaction!
+
+  SPLIT_OUTPUT = Regexp.compile(/(?<meta>message: (?<message>.*)\n|status: (?<status>.*)\n)? stdout: (?<stdout>.*)\n stderr: ?(?<stderr>.*)/m)
+  PYTHON_BYTE_OUTPUT = Regexp.compile(/^b'(?<raw_output>.*)'$/)
+  PYTHON_JSON_OUTPUT = Regexp.compile(/{"cmd":"write","stream":"(?<stream>.*)","data":"(?<data_output>.*)"}/)
+  RUN_OUTPUT = Regexp.compile(%r{(?<prefix>timeout:)? ?(?>make run\r\n)?(?>python3 /usr/lib/[^\r\n]*\r\n|/usr/bin/python3[^\r\n]*\r\n|ruby [^\r\n]*\r\n)?(?<cleaned_output>[^ "\e][^\e]*?[^#\e])?(?<shell>\r\e.*?)?#?(?<suffix>exit|timeout)?\r?\Z}m)
+  REAL_EXIT = Regexp.compile(/\A(?>(?<json>(?<json_output>{".*?)?(?>{"cmd":(?> |"write","stream":"stdout","data":)?"#?exit(?>\\[nr])?"})+(?<more_shell_output_after_json>.*))|(?<program_output>.*?)(?>#?exit\s*)+(?<more_shell_output_after_program>.*))\z/m)
+  STDERR_WRITTEN = Regexp.compile(/^(?:(?<rb_error>\r*[^\n\r]*\.rb:\d+:.*)|(?<other_error>\r*[^\n\r]*\.java:\d+: error.*|\r*Exception in thread.*|\r*There was .*|\r*[^\n\r]*java\.lang\..*|\r*make: \*\*\* \[.*))\z/m)
+  FIND_JSON = Regexp.compile(/{(?:(?:"(?:\\.|[^\\"])+?"\s*:\s*(?:"(?:\\.|[^\\"])*?"|-?\d++(?:\.\d++)?|\[.*?\]|{.*?}|null))+?\s*,?\s*)+}/)
+  REPLACE_INCOMPLETE_UNICODE = Regexp.compile(/(?:\\?\\u[\da-f]{0,3}|\\?\\u\{[\da-f]{0,4})"}\z/)
+
+  # NOTE: `update_columns` won't run validations nor update the `updated_at` timestamp.
+  # This is what we want here, thus we disable Rubocop for this migration.
+  # rubocop:disable Rails/SkipsModelValidations
+  def up
+    ActiveRecord::Base.transaction do
+      migrate_cause
+      migrate_messages
+    end
+  end
+
+  private
+
+  def migrate_cause
+    Rails.logger.info 'Unifying `cause` for multiple Testruns and Submissions. This might take a while...'
+
+    # Our database contains various causes: "assess, "remoteAssess", "run", "submit"
+    # As of 2022, we only differentiate between a "run" and a "assess" execution
+    # Other values were never stored programmatically but added
+    # with the `20170830083601_add_cause_to_testruns.rb` migration.
+    cause_migration = {
+      # old_value => new _value
+      'remoteAssess' => 'assess',
+      'submit' => 'assess',
+    }
+
+    Testrun.where(cause: cause_migration.keys).find_each do |testrun|
+      # Ensure that the submission has the correct cause
+      testrun.submission.update_columns(cause: testrun.cause)
+
+      # Update the testrun with the new cause
+      testrun.update_columns(cause: cause_migration[testrun.cause])
+    end
+  end
+
+  def migrate_messages
+    Rails.logger.info 'Migrating Testrun to TestrunMessages using RegEx. This will take a very long time...'
+
+    Testrun.find_each do |testrun|
+      result = case testrun.passed
+                 when true
+                   migrate_successful_score_run(testrun)
+                 when false
+                   migrate_failed_score_run(testrun)
+                 else
+                   # The "testrun" is actually a "run" (as stored in `cause`)
+                   migrate_code_execution(testrun)
+               end
+
+      testrun.update_columns(result.slice(:exit_code, :status))
+    end
+  end
+
+  def migrate_successful_score_run(testrun)
+    # If the testrun passed, we (typically) won't have any output.
+    # Thus, we assume that the assessment exited with 0 successfully
+    result = {exit_code: 0, status: :ok}
+    stdout, stderr = nil
+
+    if testrun.output&.match(PYTHON_BYTE_OUTPUT)
+      # Some runs until 2016-04-13 have (useless) output. We could remove them but keep them for now
+      #
+      # The output is manually converted in a dumped ruby string (from Python) and undumped.
+      # All Python test output is printed on STDERR, even for successful code runs.
+      dumped_raw_output = Regexp.last_match(:raw_output)&.gsub(/"/, '\\"')
+      stderr = "\"#{dumped_raw_output}\"".undump
+      # There is no other output format present in the database (checked manually), so nothing for `else`
+    end
+
+    store_stdout_stderr(testrun, stdout, stderr)
+    result
+  end
+
+  def migrate_failed_score_run(testrun)
+    # This score run was not successful. We set some defaults and search for more details
+    result = {exit_code: 1, status: :failed}
+    stdout, stderr = nil
+
+    case testrun.output
+      when SPLIT_OUTPUT
+        # Output has well-known format. Let's split it and store it in dedicated fields
+
+        # `status` is one of :ok, :failed, :container_depleted, :timeout, :out_of_memory
+        # `message` (see RegEx) was prefixed for some time and always contained no value (checked manually)
+        result[:status] = Regexp.last_match(:status)&.to_sym || result[:status]
+        stdout = Regexp.last_match(:stdout)&.presence
+        stderr = Regexp.last_match(:stderr)&.presence
+      when PYTHON_BYTE_OUTPUT
+        # The output is manually converted in a dumped ruby string (from Python) and undumped
+        dumped_raw_output = Regexp.last_match(:raw_output)&.gsub(/"/, '\\"')
+        stderr = "\"#{dumped_raw_output}\"".undump
+      when PYTHON_JSON_OUTPUT
+        # A very few (N=2) assess runs contain a single raw JSON message.
+        # To be sure, we grep the stream and data here to store it later again.
+        if Regexp.last_match(:stream) == 'stdout'
+          stdout = Regexp.last_match(:data_output)
+        else
+          stderr = Regexp.last_match(:data_output)
+        end
+      else
+        stderr = testrun.output.presence
+    end
+
+    # If possible, we try to infer whether this run used make (exit code 2) or not (exit code 1)
+    get_exit_code_from_stderr(stderr&.match(STDERR_WRITTEN), result)
+
+    store_stdout_stderr(testrun, stdout, stderr)
+    result
+  end
+
+  def migrate_code_execution(testrun)
+    # The `output` variable is modified several times throughout this script.
+    # Thus, we make a copy and modify it to remove control information, shell command lines,
+    # and any other unwanted strings that are not part of the program execution.
+    output = testrun.output
+
+    # A reference to the `result` variable is passed to each processing method and modified there
+    # Order of `status` interpretation: `Failure` before `Exit` before `timeout` before `ok`
+    result = {status: :ok, exit_code: 0}
+
+    output = code_execution_trim(output, result)
+    output = code_execution_search_for_exit(output, result)
+    # Now, we either know that
+    # - the program terminated (status = :ok) or
+    # - the execution timed-out (status = :timeout) or
+    # - that the information is not present in the data (e.g., because it was truncated). In this case, we need
+    #   to assume successful termination. Further "guessing" is performed below based on the output.
+
+    code_execution_process_json(testrun, output, result)
+
+    # If we found JSON input, we're done and can continue with saving our changes.
+    # Otherwise, we need to continue here...
+    code_execution_process_raw(testrun, output, result) unless result[:json_output]
+
+    result
+  end
+
+  def code_execution_trim(output, result)
+    # First, we apply some pre-processing:
+    # - Identify `timeout: ` and `timeout:` prefixes
+    # - Identify `#exit`, `#timeout` suffixes (with optional # and \n)
+    # - Clean remaining output and remove trailing "make run", "python3 /usr/lib/...", "/usr/bin/python3 ...", and "ruby ..." lines.
+    # - Additionally, (multiple) trailing " (from Python) are removed so that these start with {" (a usual JSON)
+    # - Also, remove any shell output by identifying \e (ESC) - filter checked manually
+    pre_processing = output&.match(RUN_OUTPUT)
+
+    if pre_processing.present?
+      # The `prefix` might only be `timeout:`. We use that.
+      result[:status] = :timeout if pre_processing[:prefix] == 'timeout:'
+
+      # The `suffix` might be `timeout` or `exit`.
+      # As sometimes the execution was not identified as `exited`, a `timeout` was reached.
+      # Here, we want to "restore" the original status: If the execution `exited`, we ignore the `timeout`.
+      result[:status] = :timeout if pre_processing[:suffix] == 'timeout'
+      result[:status] = :ok if pre_processing[:suffix] == 'exit'
+
+      # For further processing, we continue using our cleaned_output
+      output = pre_processing[:cleaned_output]
+
+      # Other shell output is ignored by design.
+      # pre_processing[:shell]
+    end
+
+    output
+  end
+
+  def code_execution_search_for_exit(output, result)
+    # Second, we check for (another) presence of an `exit`.
+    # This time, we consider the following variances:
+    # - {"cmd": "exit"}
+    # - {"cmd":"exit"}
+    # - #exit
+    # - exit
+    # The text until the first `exit` is recognized will be treated as the original output
+    # Any text that is included after the last `exit` is considered as further shell output (and thus ignored).
+    search_for_exit = output&.match(REAL_EXIT)
+
+    # If we find an `exit` identifier, we also know whether JSON was processed or not.
+    # That information is stored for further processing. If not found, we don't assume JSON.
+    result[:json_output] = false
+
+    if search_for_exit.present? # Nothing matched, we don't have any exit code
+      output = search_for_exit[:json_output] || search_for_exit[:program_output]
+      result[:status] = :ok
+
+      # Check whether the first group with JSON data matched.
+      result[:json_output] = search_for_exit[:json]
+
+      # Other shell output is ignored by design.
+      # search_for_exit[:more_shell_output_after_json] || search_for_exit[:more_shell_output_after_program]
+    end
+
+    output
+  end
+
+  def code_execution_process_json(testrun, output, result)
+    # Third, we parse JSON messages and split those into their streams
+    # Before doing so, we try to close the last JSON message (which was probably cut off hard)
+    # Either, we need to end the current string with " and close the object with } or just close the object.
+    if output.present? && output.count('{') != output.count('}')
+      # Remove single trailing escape character
+      output.delete_suffix!('\\') if (output.ends_with?('\\') && !output.ends_with?('\\\\')) || (output.ends_with?('\\\\\\') && !output.ends_with?('\\\\\\\\'))
+      if (output.ends_with?('"') || output.ends_with?('{')) && !output.ends_with?('\"')
+        output += '}'
+      else
+        output += '"}'
+      end
+      # Remove incomplete unicode characters (usually \u0061) at the end of the JSON
+      output = output.gsub(REPLACE_INCOMPLETE_UNICODE, '"}')
+    end
+    # Then, we look for valid JSON parts and parse them.
+    unparsed_json_messages = output&.scan(FIND_JSON) || []
+    parsed_json_messages = unparsed_json_messages.map {|message| JSON.parse(message) }
+
+    parsed_json_messages.each_with_index do |json, index|
+      create_testrun_message(testrun, json, index + 1) # Index is starting with 0.
+
+      # It is very likely that any presence of stderr indicates an unsuccessful execution.
+      next unless json['stream'] == 'stderr'
+
+      result[:status] = :failed
+      result[:exit_code] = 1
+      # If possible, we try to infer whether this run used make (exit code 2) or not (exit code 1)
+      get_exit_code_from_stderr(json['data']&.match(STDERR_WRITTEN), result)
+    end
+
+    result[:json_output] = parsed_json_messages.present?
+  end
+
+  def code_execution_process_raw(testrun, output, result)
+    # Forth, we want to separate the remaining `output` into STDOUT and STDERR.
+    # In this step, we also infer whether the program exited normally or unexpectedly.
+    errors = output&.match(STDERR_WRITTEN) || {}
+    # Probably, only one of `rb_error` or `other_error` is filled. We want to be sure and concatenate both
+    stderr = "#{errors[:rb_error]}#{errors[:rb_error]}"
+    stdout = output&.delete_suffix(stderr) || output # Fallback to full output in case nothing matched
+
+    get_exit_code_from_stderr(errors, result)
+
+    store_stdout_stderr(testrun, stdout, stderr)
+    result
+  end
+
+  def get_exit_code_from_stderr(stderr_match, result)
+    # As the exit code is not recorded yet, we define some rules
+    # - An output containing ".java:<line No>: error" had a syntax error (Java)
+    # - An output containing "Exception in thread " or "java.lang." had a runtime exception (Java)
+    # - An output containing ".rb:<line No>:" also had some error (Ruby)
+    # - An output containing "make: *** [<target>] Error <int>" failed (somewhere)
+    # - (No dedicated search for R, JS, required [e.g., because of using make])
+    # In our use case, `make` either returns `0` for success or `2` for any error (regardless of the <int> shown).
+    # For others, we set the exit code to `1` (as done by Ruby or other interpreters)
+
+    return if stderr_match.nil?
+
+    if stderr_match[:rb_error].present?
+      # Ruby is used without make and normally exists with `1` in case of an error
+      result[:exit_code] = 1
+      result[:status] = :failed
+    elsif stderr_match[:other_error].present?
+      # `make` was used and the exit code was `2` (according to `man` page)
+      result[:exit_code] = 2
+      result[:status] = :failed
+    end
+  end
+
+  def store_stdout_stderr(testrun, stdout, stderr)
+    # Create two messages based on our split messages.
+    # We assume that (most likely) STDOUT was written before STDERR
+    order = 0 # Incremented before storing any value
+    create_testrun_message(testrun, {'cmd' => 'write', 'stream' => 'stdout', 'data' => stdout}, order += 1) if stdout.present?
+    create_testrun_message(testrun, {'cmd' => 'write', 'stream' => 'stderr', 'data' => stderr}, order + 1) if stderr.present?
+  end
+
+  def create_testrun_message(testrun, json, order)
+    # Using the string keys by design. Otherwise, we would need to call #symbolize_keys!
+
+    message = {
+      testrun: testrun,
+      cmd: json['cmd'],
+      # We cannot infer any timestamp and thus use arbitrary, distinct millisecond values (1s = 1000ms)
+      timestamp: ActiveSupport::Duration.build(order / 1000.0),
+      created_at: testrun.created_at,
+      updated_at: testrun.updated_at,
+    }
+
+    if json.key?('stream') && json.key?('data')
+      message[:stream] = json['stream']
+      message[:log] = json['data']
+    else
+      message[:data] = json.except('cmd').presence
+    end
+
+    begin
+      TestrunMessage.create(message)
+    rescue StandardError
+      # We ignore any error here. This probably indicates that the JSON recovered from the output was invalid
+      # An invalid JSON might be caused by our arbitrary truncation...
+    end
+  end
+
+  # rubocop:enable Rails/SkipsModelValidations
+end
--- a/db/schema.rb
+++ b/db/schema.rb
@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.

-ActiveRecord::Schema.define(version: 2022_04_15_215111) do
+ActiveRecord::Schema.define(version: 2022_04_15_215112) do

  # These are extensions that must be enabled in order to support this database
  enable_extension "pg_trgm"
--- a/lib/runner/strategy.rb
+++ b/lib/runner/strategy.rb
@ -33,7 +33,7 @@ class Runner::Strategy
    raise NotImplementedError
  end

-  def attach_to_execution(_command, _event_loop)
+  def attach_to_execution(_command, _event_loop, _starting_time)
    raise NotImplementedError
  end

--- a/lib/runner/strategy/docker_container_pool.rb
+++ b/lib/runner/strategy/docker_container_pool.rb
@ -108,7 +108,7 @@ class Runner::Strategy::DockerContainerPool < Runner::Strategy
    Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished copying files" }
  end

-  def attach_to_execution(command, event_loop)
+  def attach_to_execution(command, event_loop, starting_time)
    reset_inactivity_timer

    @command = command
@ -119,7 +119,7 @@ class Runner::Strategy::DockerContainerPool < Runner::Strategy
    begin
      Timeout.timeout(@execution_environment.permitted_execution_time) do
        socket.send_data(command)
-        yield(socket)
+        yield(socket, starting_time)
        event_loop.wait
        event_loop.stop
      end
--- a/lib/runner/strategy/null.rb
+++ b/lib/runner/strategy/null.rb
@ -25,11 +25,11 @@ class Runner::Strategy::Null < Runner::Strategy

  def copy_files(_files); end

-  def attach_to_execution(command, event_loop)
+  def attach_to_execution(command, event_loop, starting_time)
    socket = Connection.new(nil, self, event_loop)
    # We don't want to return an error if the execution environment is changed
    socket.status = :terminated_by_codeocean if command == ExecutionEnvironment::VALIDATION_COMMAND
-    yield(socket)
+    yield(socket, starting_time)
    socket
  end

--- a/lib/runner/strategy/poseidon.rb
+++ b/lib/runner/strategy/poseidon.rb
@ -133,10 +133,10 @@ class Runner::Strategy::Poseidon < Runner::Strategy
    Rails.logger.debug { "#{Time.zone.now.getutc.inspect}: Finished copying files" }
  end

-  def attach_to_execution(command, event_loop)
+  def attach_to_execution(command, event_loop, starting_time)
    websocket_url = execute_command(command)
    socket = Connection.new(websocket_url, self, event_loop)
-    yield(socket)
+    yield(socket, starting_time)
    socket
  end

--- a/spec/models/runner_spec.rb
+++ b/spec/models/runner_spec.rb
@ -115,10 +115,12 @@ describe Runner do
      end

      it 'attaches the execution time to the error' do
-        starting_time = Time.zone.now
+        test_starting_time = Time.zone.now
        expect { runner.attach_to_execution(command) }.to raise_error do |raised_error|
-          test_time = Time.zone.now - starting_time
+          test_time = Time.zone.now - test_starting_time
          expect(raised_error.execution_duration).to be_between(0.0, test_time).exclusive
+          # The `starting_time` is shortly after the `test_starting_time``
+          expect(raised_error.starting_time).to be > test_starting_time
        end
      end
    end