From d9d77fbd97bf606ad1cfa33e6df30d3d90fa39bc Mon Sep 17 00:00:00 2001 From: Sebastian Serth Date: Mon, 23 Mar 2020 13:38:09 +0100 Subject: [PATCH] Various important bug fixes and self destroy for containers --- lib/docker_client.rb | 41 +++++++++++++++++++++++++++++++---- lib/docker_container_mixin.rb | 1 + lib/docker_container_pool.rb | 18 ++++++++++----- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/lib/docker_client.rb b/lib/docker_client.rb index b9367360..14f5af51 100644 --- a/lib/docker_client.rb +++ b/lib/docker_client.rb @@ -9,6 +9,9 @@ class DockerClient MINIMUM_MEMORY_LIMIT = 4 RECYCLE_CONTAINERS = true RETRY_COUNT = 2 + MINIMUM_CONTAINER_LIFETIME = 10.minutes + MAXIMUM_CONTAINER_LIFETIME = 20.minutes + SELF_DESTROY_GRACE_PERIOD = 2.minutes attr_reader :container attr_reader :socket @@ -116,6 +119,34 @@ class DockerClient container.start(container_start_options(execution_environment, local_workspace_path)) container.start_time = Time.now container.status = :created + container.re_use = true + + # TODO: + # - gerade benutzt? 15 Min -> Status setzen und prüfen (return beachten) -> 2 hart killen + # - schon weg? + Thread.new do + timeout = Random.rand(MINIMUM_CONTAINER_LIFETIME..MAXIMUM_CONTAINER_LIFETIME) # seconds + sleep(timeout) + container.re_use = false + if container.status != :executing + new(execution_environment: execution_environment).kill_container(container, false) + Rails.logger.info('Killing container in status ' + container.status + ' after ' + (Time.now - container.start_time).to_s + ' seconds.') + else + Thread.new do + timeout = SELF_DESTROY_GRACE_PERIOD.to_i + sleep(timeout) + new(execution_environment: execution_environment).kill_container(container, false) + Rails.logger.info('Force killing container in status ' + container.status + ' after ' + Time.now - container.start_time + ' seconds.') + ensure + # guarantee that the thread is releasing the DB connection after it is done + ActiveRecord::Base.connection_pool.release_connection + end + end + ensure + # guarantee that the thread is releasing the DB connection after it is done + ActiveRecord::Base.connection_pool.release_connection + end + container rescue Docker::Error::NotFoundError => error Rails.logger.error('create_container: Got Docker::Error::NotFoundError: ' + error.to_s) @@ -307,7 +338,7 @@ class DockerClient (DockerContainerPool.config[:active] && RECYCLE_CONTAINERS) ? self.class.return_container(container, @execution_environment) : self.class.destroy_container(container) end - def kill_container(container) + def kill_container(container, create_new = true) exit_thread_if_alive Rails.logger.info('killing container ' + container.to_s) # remove container from pool, then destroy it @@ -317,13 +348,15 @@ class DockerClient # create new container and add it to @all_containers and @containers. # ToDo: How long does creating a new cotainer take? We're still locking the semaphore. - missing_counter_count = execution_environment.pool_size - @all_containers[execution_environment.id].length - if missing_counter_count > 0 + missing_counter_count = @execution_environment.pool_size - DockerContainerPool.all_containers[@execution_environment.id].length + if missing_counter_count > 0 && create_new Rails.logger.error('kill_container: Creating a new container.') new_container = self.class.create_container(@execution_environment) DockerContainerPool.add_to_all_containers(new_container, @execution_environment) + elsif !create_new + Rails.logger.error('Container killed and removed for ' + @execution_environment.to_s + ' but not creating a new one. Currently, ' + missing_counter_count.abs.to_s + ' more containers than the configured pool size are available.') else - Rails.logger.error('Container killed and removed for ' + execution_environment.to_s + ' but not creating a new one. Currently, ' + missing_counter_count.abs + ' more containers than the configured pool size are available.') + Rails.logger.error('Container killed and removed for ' + @execution_environment.to_s + ' but not creating a new one as per request. Currently, ' + missing_counter_count.to_s + ' containers are missing compared to the configured pool size are available. Negative number means they are too much containers') end DockerContainerPool.release_semaphore end diff --git a/lib/docker_container_mixin.rb b/lib/docker_container_mixin.rb index fde9fe5b..d5881b38 100644 --- a/lib/docker_container_mixin.rb +++ b/lib/docker_container_mixin.rb @@ -2,6 +2,7 @@ module DockerContainerMixin attr_accessor :start_time attr_accessor :status + attr_accessor :re_use def binds json['HostConfig']['Binds'] diff --git a/lib/docker_container_pool.rb b/lib/docker_container_pool.rb index 73eacd88..b6855971 100644 --- a/lib/docker_container_pool.rb +++ b/lib/docker_container_pool.rb @@ -88,7 +88,7 @@ class DockerContainerPool def self.return_container(container, execution_environment) container.status = 'available' # FIXME: String vs Symbol usage? - if @containers[execution_environment.id] && !@containers[execution_environment.id].include?(container) + if @containers[execution_environment.id] && !@containers[execution_environment.id].include?(container) && container.re_use @containers[execution_environment.id].push(container) else Rails.logger.error('trying to return existing container ' + container.to_s + ' to execution_environment ' + execution_environment.to_s) @@ -101,11 +101,11 @@ class DockerContainerPool container = @containers[execution_environment.id].try(:shift) || nil Rails.logger.debug('get_container fetched container ' + container.to_s + ' for execution environment ' + execution_environment.to_s) # just access and the following if we got a container. Otherwise, the execution_environment might be just created and not fully exist yet. - if(container) + if (container) begin # check whether the container is running. exited containers go to the else part. # Dead containers raise a NotFOundError on the container.json call. This is handled in the rescue block. - if(container.json['State']['Running']) + if (container.json['State']['Running']) Rails.logger.debug('get_container remaining avail. containers: ' + @containers[execution_environment.id].size.to_s) Rails.logger.debug('get_container all container count: ' + @all_containers[execution_environment.id].size.to_s) else @@ -135,7 +135,7 @@ class DockerContainerPool new_container = create_container(execution_environment) DockerContainerPool.add_to_all_containers(new_container, execution_environment) else - Rails.logger.error('Broken container removed for ' + execution_environment.to_s + ' but not creating a new one. Currently, ' + missing_counter_count.abs + ' more containers than the configured pool size are available.') + Rails.logger.error('Broken container removed for ' + execution_environment.to_s + ' but not creating a new one. Currently, ' + missing_counter_count.abs.to_s + ' more containers than the configured pool size are available.') new_container = get_container(execution_environment, bypass_semaphore: true) end release_semaphore unless bypass_semaphore @@ -146,6 +146,14 @@ class DockerContainerPool @containers.map { |key, value| [key, value.length] }.to_h end + def self.dump_info + { + process: $$, + containers: @containers.as_json, + all_containers: @all_containers.as_json + } + end + def self.refill ExecutionEnvironment.where('pool_size > 0').order(pool_size: :desc).each do |execution_environment| if config[:refill][:async] @@ -160,7 +168,7 @@ class DockerContainerPool acquire_semaphore refill_count = [execution_environment.pool_size - @all_containers[execution_environment.id].length, config[:refill][:batch_size]].min if refill_count > 0 - Rails.logger.info('Adding ' + refill_count.to_s + ' containers for execution_environment ' + execution_environment.name ) + Rails.logger.info('Adding ' + refill_count.to_s + ' containers for execution_environment ' + execution_environment.name) multiple_containers = refill_count.times.map { create_container(execution_environment) } #Rails.logger.info('Created containers: ' + multiple_containers.to_s ) @containers[execution_environment.id].concat(multiple_containers)