From 9b37ed3df0134443846c04cf02df6b4df7b0bab7 Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Mon, 23 Mar 2020 15:12:58 +0100 Subject: [PATCH 1/5] Increase log level for production --- config/environments/production.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/environments/production.rb b/config/environments/production.rb index 62ac0bb7..86b0fac5 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -58,7 +58,7 @@ Rails.application.configure do # Use the lowest log level to ensure availability of diagnostic information # when problems arise. - config.log_level = :info + config.log_level = :error # Prepend all log lines with the following tags. # config.log_tags = [ :subdomain, :uuid, :request_id ] From 25af28756344a3e07b82c2da4a2cb2c003f28083 Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Tue, 24 Mar 2020 18:05:28 +0100 Subject: [PATCH 2/5] Fix German translation --- config/locales/de.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/locales/de.yml b/config/locales/de.yml index cc234e95..3d847575 100644 --- a/config/locales/de.yml +++ b/config/locales/de.yml @@ -283,7 +283,7 @@ de: confirm_start_over_active_file: Wollen Sie wirklich den Inhalt der ausgewählten Datei '%{filename}' löschen? Diese Aktion kann nicht rückgängig gemacht werden. confirm_submit: Wollen Sie Ihren Code wirklich zur Bewertung abgeben? create_file: Neue Datei - depleted: Alle Ausführungsausgebungen sind momentan in Benutzung. Probiere es später nochmal. + depleted: Alle Ausführungsumgebungen sind momentan in Benutzung. Probiere es später nochmal. destroy_file: Datei löschen download: Herunterladen dummy: Keine Aktion From de06a83f4ca3f5d24127174b7edf310c7d73fb25 Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Tue, 24 Mar 2020 18:06:21 +0100 Subject: [PATCH 3/5] Add a few comments --- lib/docker_client.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/docker_client.rb b/lib/docker_client.rb index 14f5af51..fb18dac5 100644 --- a/lib/docker_client.rb +++ b/lib/docker_client.rb @@ -121,9 +121,6 @@ class DockerClient container.status = :created container.re_use = true - # TODO: - # - gerade benutzt? 15 Min -> Status setzen und prüfen (return beachten) -> 2 hart killen - # - schon weg? Thread.new do timeout = Random.rand(MINIMUM_CONTAINER_LIFETIME..MAXIMUM_CONTAINER_LIFETIME) # seconds sleep(timeout) @@ -302,6 +299,7 @@ class DockerClient if container.status != :returned Rails.logger.info('Killing container after timeout of ' + timeout.to_s + ' seconds.') # send timeout to the tubesock socket + # FIXME: 2nd thread to notify user. if @tubesock @tubesock.send_data JSON.dump({'cmd' => 'timeout'}) end @@ -416,6 +414,7 @@ class DockerClient end def self.initialize_environment + # TODO: Move to DockerContainerPool unless config[:connection_timeout] && config[:workspace_root] fail(Error, 'Docker configuration missing!') end From b0761dbdb5b2b5d4c44797ea554c42d2b0b83e5a Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Wed, 25 Mar 2020 11:30:11 +0100 Subject: [PATCH 4/5] Extract pool handling to separate service --- Gemfile.lock | 2 +- config/initializers/docker.rb | 3 +- lib/docker_client.rb | 28 +----- lib/docker_container_pool.rb | 158 ++-------------------------------- 4 files changed, 15 insertions(+), 176 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 3370f8f0..2457094e 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -485,4 +485,4 @@ DEPENDENCIES whenever BUNDLED WITH - 1.17.3 + 2.1.2 diff --git a/config/initializers/docker.rb b/config/initializers/docker.rb index 4e5a6cbe..200a2207 100644 --- a/config/initializers/docker.rb +++ b/config/initializers/docker.rb @@ -4,6 +4,5 @@ return if Rake.application.top_level_tasks.to_s.include?('db:') if ApplicationRecord.connection.tables.present? && DockerContainerPool.config[:active] - DockerContainerPool.start_refill_task - at_exit { DockerContainerPool.clean_up } + # no op end diff --git a/lib/docker_client.rb b/lib/docker_client.rb index fb18dac5..819cfa14 100644 --- a/lib/docker_client.rb +++ b/lib/docker_client.rb @@ -235,8 +235,10 @@ class DockerClient clean_container_workspace(container) # Checks only if container assignment is not nil and not whether the container itself is still present. - if container + if container && !DockerContainerPool.config[:active] container.delete(force: true, v: true) + elsif container + DockerContainerPool.destroy_container(container) end rescue Docker::Error::NotFoundError => error Rails.logger.error('destroy_container: Rescued from Docker::Error::NotFoundError: ' + error.to_s) @@ -339,29 +341,7 @@ class DockerClient def kill_container(container, create_new = true) exit_thread_if_alive Rails.logger.info('killing container ' + container.to_s) - # remove container from pool, then destroy it - if (DockerContainerPool.config[:active]) - DockerContainerPool.acquire_semaphore - DockerContainerPool.remove_from_all_containers(container, @execution_environment) - # create new container and add it to @all_containers and @containers. - # ToDo: How long does creating a new cotainer take? We're still locking the semaphore. - - missing_counter_count = @execution_environment.pool_size - DockerContainerPool.all_containers[@execution_environment.id].length - if missing_counter_count > 0 && create_new - Rails.logger.error('kill_container: Creating a new container.') - new_container = self.class.create_container(@execution_environment) - DockerContainerPool.add_to_all_containers(new_container, @execution_environment) - elsif !create_new - Rails.logger.error('Container killed and removed for ' + @execution_environment.to_s + ' but not creating a new one. Currently, ' + missing_counter_count.abs.to_s + ' more containers than the configured pool size are available.') - else - Rails.logger.error('Container killed and removed for ' + @execution_environment.to_s + ' but not creating a new one as per request. Currently, ' + missing_counter_count.to_s + ' containers are missing compared to the configured pool size are available. Negative number means they are too much containers') - end - DockerContainerPool.release_semaphore - end - - Thread.new do - self.class.destroy_container(container) - end + self.class.destroy_container(container) end def execute_run_command(submission, filename, &block) diff --git a/lib/docker_container_pool.rb b/lib/docker_container_pool.rb index b6855971..0df2fe8a 100644 --- a/lib/docker_container_pool.rb +++ b/lib/docker_container_pool.rb @@ -4,80 +4,11 @@ require 'concurrent/timer_task' class DockerContainerPool - # Always use a semaphore when dealing with write access to `@containers` or `@all_containers` or read access if that involves creating a new container - # Some methods have the `bypass_semaphore` flag which should be rarely used. It was introduced when adding the semaphore to the `replace_broken_container` method. - # When `replace_broken_container` was extended to check the total amount of containers first, the `bypass_semaphore` flag was also added to `get_container` and - # back to `replace_broken_container`. When the semaphore is not released, no container operations can be done! - # Furthermore, the semaphore is used when killing a container in the DockerClient. - @semaphore = Concurrent::Semaphore.new(1) - - @containers = Concurrent::Hash[ExecutionEnvironment.all.map { |execution_environment| [execution_environment.id, Concurrent::Array.new] }] - #as containers are not containing containers in use - @all_containers = Concurrent::Hash[ExecutionEnvironment.all.map { |execution_environment| [execution_environment.id, Concurrent::Array.new] }] - - def self.clean_up - Rails.logger.info('Container Pool is now performing a cleanup. ') - @refill_task.try(:shutdown) - @all_containers.values.each do |containers| - DockerClient.destroy_container(containers.shift) until containers.empty? - end - end - def self.config #TODO: Why erb? @config ||= CodeOcean::Config.new(:docker).read(erb: true)[:pool] end - def self.containers - @containers - end - - def self.all_containers - @all_containers - end - - def self.acquire_semaphore - return - Rails.logger.info("Semaphore - Acquire: Trying " + @semaphore.inspect.to_s + " for " + caller_locations(1, 1)[0].label) - got_semaphore = @semaphore.try_acquire(1, 20) - if got_semaphore - Rails.logger.info("Semaphore - Acquire: Got " + @semaphore.inspect.to_s + " for " + caller_locations(1, 1)[0].label) - else - Rails.logger.error("Semaphore - Acquire: Failed " + @semaphore.inspect.to_s + " for " + caller_locations(1, 1)[0].label) - end - end - - def self.release_semaphore - return - Rails.logger.info("Semaphore - Release: Trying " + @semaphore.inspect.to_s + " for " + caller_locations(1, 1)[0].label) - if @semaphore.available_permits < 1 - @semaphore.release - Rails.logger.info("Semaphore - Release: Done " + @semaphore.inspect.to_s + " for " + caller_locations(1, 1)[0].label) - else - Rails.logger.info("Semaphore - Release: Failed " + @semaphore.inspect.to_s + " for " + caller_locations(1, 1)[0].label) - end - end - - def self.remove_from_all_containers(container, execution_environment) - if @containers[execution_environment.id].include?(container) - @containers[execution_environment.id].delete(container) - Rails.logger.debug('Removed container ' + container.to_s + ' from available_pool for execution environment ' + execution_environment.to_s + '. Remaining containers in available_pool ' + @containers[execution_environment.id].size.to_s) - end - - @all_containers[execution_environment.id].delete(container) - Rails.logger.debug('Removed container ' + container.to_s + ' from all_pool for execution environment ' + execution_environment.to_s + '. Remaining containers in all_pool ' + @all_containers[execution_environment.id].size.to_s) - end - - def self.add_to_all_containers(container, execution_environment) - @all_containers[execution_environment.id].push(container) - if !@containers[execution_environment.id].include?(container) - @containers[execution_environment.id].push(container) - #Rails.logger.debug('Added container ' + container.to_s + ' to all_pool for execution environment ' + execution_environment.to_s + '. Containers in all_pool: ' + @all_containers[execution_environment.id].size.to_s) - else - Rails.logger.error('failed trying to add existing container ' + container.to_s + ' to execution_environment ' + execution_environment.to_s) - end - end - def self.create_container(execution_environment) Rails.logger.info('trying to create container for execution environment: ' + execution_environment.to_s) container = DockerClient.create_container(execution_environment) @@ -87,100 +18,29 @@ class DockerContainerPool end def self.return_container(container, execution_environment) - container.status = 'available' # FIXME: String vs Symbol usage? - if @containers[execution_environment.id] && !@containers[execution_environment.id].include?(container) && container.re_use - @containers[execution_environment.id].push(container) - else - Rails.logger.error('trying to return existing container ' + container.to_s + ' to execution_environment ' + execution_environment.to_s) - end + Faraday.get(config[:location] + "/docker_container_pool/return_container/" + container.id) end - def self.get_container(execution_environment, bypass_semaphore: false) + def self.get_container(execution_environment) # if pooling is active, do pooling, otherwise just create an container and return it if config[:active] - container = @containers[execution_environment.id].try(:shift) || nil - Rails.logger.debug('get_container fetched container ' + container.to_s + ' for execution environment ' + execution_environment.to_s) - # just access and the following if we got a container. Otherwise, the execution_environment might be just created and not fully exist yet. - if (container) - begin - # check whether the container is running. exited containers go to the else part. - # Dead containers raise a NotFOundError on the container.json call. This is handled in the rescue block. - if (container.json['State']['Running']) - Rails.logger.debug('get_container remaining avail. containers: ' + @containers[execution_environment.id].size.to_s) - Rails.logger.debug('get_container all container count: ' + @all_containers[execution_environment.id].size.to_s) - else - Rails.logger.error('docker_container_pool.get_container retrieved a container not running. Container will be removed from list: ' + container.to_s) - #TODO: check in which state the container actually is and treat it accordingly (dead,... => destroy?) - container = replace_broken_container(container, execution_environment, bypass_semaphore: bypass_semaphore) - end - rescue Docker::Error::NotFoundError => error - Rails.logger.error('docker_container_pool.get_container rescued from Docker::Error::NotFoundError. Most likely, the container is not there any longer. Removing faulty entry from list: ' + container.to_s) - container = replace_broken_container(container, execution_environment, bypass_semaphore: bypass_semaphore) - end - end - # returning nil is no problem. then the pool is just depleted. - container + container_id = JSON.parse(Faraday.get(config[:location] + "/docker_container_pool/get_container/" + execution_environment.id.to_s).body)['id'] + Docker::Container.get(container_id) unless container_id.blank? else create_container(execution_environment) end end - def self.replace_broken_container(container, execution_environment, bypass_semaphore: false) - # See note at the top for `bypass_semaphore` - acquire_semaphore unless bypass_semaphore - remove_from_all_containers(container, execution_environment) - missing_counter_count = execution_environment.pool_size - @all_containers[execution_environment.id].length - if missing_counter_count > 0 - Rails.logger.error('replace_broken_container: Creating a new container and returning that.') - new_container = create_container(execution_environment) - DockerContainerPool.add_to_all_containers(new_container, execution_environment) - else - Rails.logger.error('Broken container removed for ' + execution_environment.to_s + ' but not creating a new one. Currently, ' + missing_counter_count.abs.to_s + ' more containers than the configured pool size are available.') - new_container = get_container(execution_environment, bypass_semaphore: true) - end - release_semaphore unless bypass_semaphore - new_container + def self.destroy_container(container) + Faraday.get(config[:location] + "/docker_container_pool/destroy_container/" + container.id) end def self.quantities - @containers.map { |key, value| [key, value.length] }.to_h + response = JSON.parse(Faraday.get(config[:location] + "/docker_container_pool/quantities").body) + response.transform_keys(&:to_i) end def self.dump_info - { - process: $$, - containers: @containers.as_json, - all_containers: @all_containers.as_json - } - end - - def self.refill - ExecutionEnvironment.where('pool_size > 0').order(pool_size: :desc).each do |execution_environment| - if config[:refill][:async] - Concurrent::Future.execute { refill_for_execution_environment(execution_environment) } - else - refill_for_execution_environment(execution_environment) - end - end - end - - def self.refill_for_execution_environment(execution_environment) - acquire_semaphore - refill_count = [execution_environment.pool_size - @all_containers[execution_environment.id].length, config[:refill][:batch_size]].min - if refill_count > 0 - Rails.logger.info('Adding ' + refill_count.to_s + ' containers for execution_environment ' + execution_environment.name) - multiple_containers = refill_count.times.map { create_container(execution_environment) } - #Rails.logger.info('Created containers: ' + multiple_containers.to_s ) - @containers[execution_environment.id].concat(multiple_containers) - @all_containers[execution_environment.id].concat(multiple_containers) - #Rails.logger.debug('@containers for ' + execution_environment.name.to_s + ' (' + @containers.object_id.to_s + ') has the following content: '+ @containers[execution_environment.id].to_s) - #Rails.logger.debug('@all_containers for ' + execution_environment.name.to_s + ' (' + @all_containers.object_id.to_s + ') has the following content: ' + @all_containers[execution_environment.id].to_s) - end - release_semaphore - end - - def self.start_refill_task - @refill_task = Concurrent::TimerTask.new(execution_interval: config[:refill][:interval], run_now: true, timeout_interval: config[:refill][:timeout]) { refill } - @refill_task.execute + JSON.parse(Faraday.get(config[:location] + "/docker_container_pool/dump_info").body) end end From 380b2ce6c7b3822bbae717d949150e9e55f25b0d Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Wed, 25 Mar 2020 11:47:59 +0100 Subject: [PATCH 5/5] Enhance kill for container --- lib/docker_client.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/docker_client.rb b/lib/docker_client.rb index 819cfa14..769f4a82 100644 --- a/lib/docker_client.rb +++ b/lib/docker_client.rb @@ -230,12 +230,12 @@ class DockerClient @socket.close end Rails.logger.info('destroying container ' + container.to_s) - container.stop.kill - container.port_bindings.values.each { |port| PortPool.release(port) } - clean_container_workspace(container) # Checks only if container assignment is not nil and not whether the container itself is still present. if container && !DockerContainerPool.config[:active] + clean_container_workspace(container) + container.stop.kill + container.port_bindings.values.each { |port| PortPool.release(port) } container.delete(force: true, v: true) elsif container DockerContainerPool.destroy_container(container)