improved quantile calculation by using only times until user has reached max points
This commit is contained in:
@ -82,23 +82,111 @@ class Exercise < ActiveRecord::Base
|
||||
def get_quantiles(quantiles)
|
||||
quantiles_str = "[" + quantiles.join(",") + "]"
|
||||
result = self.class.connection.execute("""
|
||||
SELECT unnest(PERCENTILE_CONT(ARRAY#{quantiles_str}) WITHIN GROUP (ORDER BY working_time))
|
||||
FROM
|
||||
(
|
||||
SELECT user_id,
|
||||
sum(working_time_new) AS working_time
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
CASE WHEN working_time >= '0:30:00' THEN '0' ELSE working_time END AS working_time_new
|
||||
FROM
|
||||
(SELECT user_id,
|
||||
id,
|
||||
(created_at - lag(created_at) OVER (PARTITION BY user_id, exercise_id
|
||||
ORDER BY created_at)) AS working_time
|
||||
FROM submissions
|
||||
WHERE exercise_id=#{self.id} AND user_type = 'ExternalUser') AS foo) AS bar
|
||||
GROUP BY user_id
|
||||
) AS foo
|
||||
WITH working_time AS
|
||||
(
|
||||
SELECT user_id,
|
||||
id,
|
||||
exercise_id,
|
||||
Max(score) AS max_score,
|
||||
(created_at - Lag(created_at) OVER (partition BY user_id, exercise_id ORDER BY created_at)) AS working_time
|
||||
FROM submissions
|
||||
WHERE exercise_id = #{id}
|
||||
AND user_type = 'ExternalUser'
|
||||
GROUP BY user_id,
|
||||
id,
|
||||
exercise_id), max_points AS
|
||||
(
|
||||
SELECT context_id AS ex_id,
|
||||
Sum(weight) AS max_points
|
||||
FROM files
|
||||
WHERE context_type = 'Exercise'
|
||||
AND context_id = #{id}
|
||||
AND role = 'teacher_defined_test'
|
||||
GROUP BY context_id),
|
||||
-- filter for rows containing max points
|
||||
time_max_score AS
|
||||
(
|
||||
SELECT *
|
||||
FROM working_time W1,
|
||||
max_points MS
|
||||
WHERE w1.exercise_id = ex_id
|
||||
AND w1.max_score = ms.max_points),
|
||||
-- find row containing the first time max points
|
||||
first_time_max_score AS
|
||||
(
|
||||
SELECT id,
|
||||
user_id,
|
||||
exercise_id,
|
||||
max_score,
|
||||
working_time,
|
||||
rn
|
||||
FROM (
|
||||
SELECT id,
|
||||
user_id,
|
||||
exercise_id,
|
||||
max_score,
|
||||
working_time,
|
||||
Row_number() OVER(partition BY user_id, exercise_id ORDER BY id ASC) AS rn
|
||||
FROM time_max_score) T
|
||||
WHERE rn = 1), times_until_max_points AS
|
||||
(
|
||||
SELECT w.id,
|
||||
w.user_id,
|
||||
w.exercise_id,
|
||||
w.max_score,
|
||||
w.working_time,
|
||||
m.id AS reachedmax_at
|
||||
FROM working_time W,
|
||||
first_time_max_score M
|
||||
WHERE w.user_id = m.user_id
|
||||
AND w.exercise_id = m.exercise_id
|
||||
AND w.id <= m.id),
|
||||
-- if user never makes it to max points, take all times
|
||||
all_working_times_until_max AS (
|
||||
(
|
||||
SELECT id,
|
||||
user_id,
|
||||
exercise_id,
|
||||
max_score,
|
||||
working_time
|
||||
FROM times_until_max_points)
|
||||
UNION ALL
|
||||
(
|
||||
SELECT id,
|
||||
user_id,
|
||||
exercise_id,
|
||||
max_score,
|
||||
working_time
|
||||
FROM working_time W1
|
||||
WHERE NOT EXISTS
|
||||
(
|
||||
SELECT 1
|
||||
FROM first_time_max_score F
|
||||
WHERE f.user_id = w1.user_id
|
||||
AND f.exercise_id = w1.exercise_id))), filtered_times_until_max AS
|
||||
(
|
||||
SELECT user_id,
|
||||
exercise_id,
|
||||
max_score,
|
||||
CASE
|
||||
WHEN working_time >= '0:30:00' THEN '0'
|
||||
ELSE working_time
|
||||
END AS working_time_new
|
||||
FROM all_working_times_until_max ), result AS
|
||||
(
|
||||
SELECT e.external_id AS external_user_id,
|
||||
f.user_id,
|
||||
exercise_id,
|
||||
Max(max_score) AS max_score,
|
||||
Sum(working_time_new) AS working_time
|
||||
FROM filtered_times_until_max f,
|
||||
external_users e
|
||||
WHERE f.user_id = e.id
|
||||
GROUP BY e.external_id,
|
||||
f.user_id,
|
||||
exercise_id )
|
||||
SELECT unnest(percentile_cont(array#{quantiles_str}) within GROUP (ORDER BY working_time))
|
||||
FROM result
|
||||
""")
|
||||
if result.count > 0
|
||||
quantiles.each_with_index.map{|q,i| Time.parse(result[i]["unnest"]).seconds_since_midnight}
|
||||
|
Reference in New Issue
Block a user