From 8b5a05ba06a285a4ea0accc2cf64e2f87ca4e1eb Mon Sep 17 00:00:00 2001 From: Maximilian Grundke Date: Wed, 22 Nov 2017 17:40:14 +0100 Subject: [PATCH] Detect exercises with too high or too low working time average --- lib/tasks/detect_exercise_anomalies.rake | 44 ++++++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 lib/tasks/detect_exercise_anomalies.rake diff --git a/lib/tasks/detect_exercise_anomalies.rake b/lib/tasks/detect_exercise_anomalies.rake new file mode 100644 index 00000000..d0be6795 --- /dev/null +++ b/lib/tasks/detect_exercise_anomalies.rake @@ -0,0 +1,44 @@ +namespace :detect_exercise_anomalies do + + task :with_at_least, [:number_of_exercises, :number_of_solutions] => :environment do |task, args| + number_of_exercises = args.number_of_exercises + number_of_solutions = args.number_of_solutions + + # These factors determine if an exercise is an anomaly, given the average working time (avg): + # (avg * MIN_TIME_FACTOR) <= working_time <= (avg * MAX_TIME_FACTOR) + MIN_TIME_FACTOR = 0.1 + MAX_TIME_FACTOR = 2 + + # Get all exercise collections that have at least the specified amount of exercises and at least the specified + # number of submissions AND are flagged for anomaly detection + collections = ExerciseCollection + .where(:use_anomaly_detection => true) + .joins("join exercise_collections_exercises ece on exercise_collections.id = ece.exercise_collection_id + join + (select e.id + from exercises e + join submissions s on s.exercise_id = e.id + group by e.id + having count(s.id) > #{ExerciseCollection.sanitize(number_of_solutions)} + ) as exercises_with_submissions on exercises_with_submissions.id = ece.exercise_id") + .group('exercise_collections.id') + .having('count(exercises_with_submissions.id) > ?', number_of_exercises) + + collections.each do |collection| + puts "\t- #{collection}" + working_times = {} + collection.exercises.each do |exercise| + puts "\t\t> #{exercise.title}" + avgwt = exercise.average_working_time.split(':') + seconds = avgwt[0].to_i * 60 * 60 + avgwt[1].to_i * 60 + avgwt[2].to_f + working_times[exercise.id] = seconds + end + average = working_times.values.reduce(:+) / working_times.size + anomalies = working_times.select do |exercise_id, working_time| + working_time > average * MAX_TIME_FACTOR or working_time < average * MIN_TIME_FACTOR + end + puts anomalies + end + end + +end