improved quantile calculation by using only times until user has reached max points

This commit is contained in:
Thomas Hille
2017-03-28 12:29:36 +02:00
parent b9e93a5b21
commit c8609ffa81

View File

@ -82,23 +82,111 @@ class Exercise < ActiveRecord::Base
def get_quantiles(quantiles)
quantiles_str = "[" + quantiles.join(",") + "]"
result = self.class.connection.execute("""
SELECT unnest(PERCENTILE_CONT(ARRAY#{quantiles_str}) WITHIN GROUP (ORDER BY working_time))
FROM
WITH working_time AS
(
SELECT user_id,
sum(working_time_new) AS working_time
FROM
(SELECT user_id,
CASE WHEN working_time >= '0:30:00' THEN '0' ELSE working_time END AS working_time_new
FROM
(SELECT user_id,
id,
(created_at - lag(created_at) OVER (PARTITION BY user_id, exercise_id
ORDER BY created_at)) AS working_time
exercise_id,
Max(score) AS max_score,
(created_at - Lag(created_at) OVER (partition BY user_id, exercise_id ORDER BY created_at)) AS working_time
FROM submissions
WHERE exercise_id=#{self.id} AND user_type = 'ExternalUser') AS foo) AS bar
GROUP BY user_id
) AS foo
WHERE exercise_id = #{id}
AND user_type = 'ExternalUser'
GROUP BY user_id,
id,
exercise_id), max_points AS
(
SELECT context_id AS ex_id,
Sum(weight) AS max_points
FROM files
WHERE context_type = 'Exercise'
AND context_id = #{id}
AND role = 'teacher_defined_test'
GROUP BY context_id),
-- filter for rows containing max points
time_max_score AS
(
SELECT *
FROM working_time W1,
max_points MS
WHERE w1.exercise_id = ex_id
AND w1.max_score = ms.max_points),
-- find row containing the first time max points
first_time_max_score AS
(
SELECT id,
user_id,
exercise_id,
max_score,
working_time,
rn
FROM (
SELECT id,
user_id,
exercise_id,
max_score,
working_time,
Row_number() OVER(partition BY user_id, exercise_id ORDER BY id ASC) AS rn
FROM time_max_score) T
WHERE rn = 1), times_until_max_points AS
(
SELECT w.id,
w.user_id,
w.exercise_id,
w.max_score,
w.working_time,
m.id AS reachedmax_at
FROM working_time W,
first_time_max_score M
WHERE w.user_id = m.user_id
AND w.exercise_id = m.exercise_id
AND w.id <= m.id),
-- if user never makes it to max points, take all times
all_working_times_until_max AS (
(
SELECT id,
user_id,
exercise_id,
max_score,
working_time
FROM times_until_max_points)
UNION ALL
(
SELECT id,
user_id,
exercise_id,
max_score,
working_time
FROM working_time W1
WHERE NOT EXISTS
(
SELECT 1
FROM first_time_max_score F
WHERE f.user_id = w1.user_id
AND f.exercise_id = w1.exercise_id))), filtered_times_until_max AS
(
SELECT user_id,
exercise_id,
max_score,
CASE
WHEN working_time >= '0:30:00' THEN '0'
ELSE working_time
END AS working_time_new
FROM all_working_times_until_max ), result AS
(
SELECT e.external_id AS external_user_id,
f.user_id,
exercise_id,
Max(max_score) AS max_score,
Sum(working_time_new) AS working_time
FROM filtered_times_until_max f,
external_users e
WHERE f.user_id = e.id
GROUP BY e.external_id,
f.user_id,
exercise_id )
SELECT unnest(percentile_cont(array#{quantiles_str}) within GROUP (ORDER BY working_time))
FROM result
""")
if result.count > 0
quantiles.each_with_index.map{|q,i| Time.parse(result[i]["unnest"]).seconds_since_midnight}