Refactor anomaly detection task

This commit is contained in:
Maximilian Grundke
2018-02-04 15:14:07 +01:00
parent 3ee993e965
commit 509335a1af

View File

@ -1,19 +1,36 @@
namespace :detect_exercise_anomalies do
task :with_at_least, [:number_of_exercises, :number_of_solutions] => :environment do |task, args|
number_of_exercises = args.number_of_exercises
number_of_solutions = args.number_of_solutions
puts "\tSearching for exercise collections with at least #{number_of_exercises} exercises and #{number_of_solutions} users."
# These factors determine if an exercise is an anomaly, given the average working time (avg):
# (avg * MIN_TIME_FACTOR) <= working_time <= (avg * MAX_TIME_FACTOR)
MIN_TIME_FACTOR = 0.1
MAX_TIME_FACTOR = 2
task :with_at_least, [:number_of_exercises, :number_of_solutions] => :environment do |task, args|
number_of_exercises = args[:number_of_exercises]
number_of_solutions = args[:number_of_solutions]
puts "Searching for exercise collections with at least #{number_of_exercises} exercises and #{number_of_solutions} users."
# Get all exercise collections that have at least the specified amount of exercises and at least the specified
# number of submissions AND are flagged for anomaly detection
collections = ExerciseCollection
collections = get_collections(number_of_exercises, number_of_solutions)
puts "Found #{collections.length}."
collections.each do |collection|
puts "\t- #{collection}"
anomalies = find_anomalies(collection)
if anomalies.length > 0 and not collection.user.nil?
puts "\t\tAnomalies: #{anomalies}\n"
notify_collection_author(collection, anomalies)
notify_users(collection, anomalies)
reset_anomaly_detection_flag(collection)
end
end
puts 'Done.'
end
def get_collections(number_of_exercises, number_of_solutions)
ExerciseCollection
.where(:use_anomaly_detection => true)
.joins("join exercise_collections_exercises ece on exercise_collections.id = ece.exercise_collection_id
join
@ -25,11 +42,9 @@ namespace :detect_exercise_anomalies do
) as exercises_with_submissions on exercises_with_submissions.id = ece.exercise_id")
.group('exercise_collections.id')
.having('count(exercises_with_submissions.id) > ?', number_of_exercises)
end
puts "\tFound #{collections.length}."
collections.each do |collection|
puts "\t- #{collection}"
def find_anomalies(collection)
working_times = {}
collection.exercises.each do |exercise|
puts "\t\t> #{exercise.title}"
@ -38,16 +53,17 @@ namespace :detect_exercise_anomalies do
working_times[exercise.id] = seconds
end
average = working_times.values.reduce(:+) / working_times.size
anomalies = working_times.select do |exercise_id, working_time|
working_times.select do |exercise_id, working_time|
working_time > average * MAX_TIME_FACTOR or working_time < average * MIN_TIME_FACTOR
end
end
if anomalies.length > 0 and not collection.user.nil?
puts "\t\tAnomalies: #{anomalies}\n"
def notify_collection_author(collection, anomalies)
puts "\t\tSending E-Mail to author (#{collection.user.displayname} <#{collection.user.email}>)..."
UserMailer.exercise_anomaly_detected(collection, anomalies).deliver_now
end
def notify_users(collection, anomalies)
puts "\t\tSending E-Mails to best and worst performing users of each anomaly..."
anomalies.each do |exercise_id, average_working_time|
submissions = Submission.find_by_sql(['
@ -73,13 +89,12 @@ namespace :detect_exercise_anomalies do
puts "\t\t\tbest performers: #{best_performers}"
puts "\t\t\tworst performers: #{worst_performers}"
end
end
def reset_anomaly_detection_flag(collection)
puts "\t\tResetting flag..."
collection.use_anomaly_detection = false
collection.save!
end
end
puts "\tDone."
end
end