WITH student_enrollment AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id AS civ_career
, ds.civ_term_id
, de.civ_is_enrolled
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de USING (dim_enrollment_id)
INNER JOIN data_mart.dim_section ds USING (dim_section_id)
WHERE
de.dim_enrollment_id != 'null_row'
AND ds.dim_section_id != 'null_row'
AND de.civ_is_enrolled = 1
)
, student_start_term AS (
SELECT
dp.raw_person_id
, dct.civ_career_id AS civ_career
, dpl.raw_award_id
, dpl.raw_award_desc
, dpl.raw_award_type_id
, MIN(dct.civ_career_term_start_dt) AS start_date
, FLOOR(DATEDIFF(month, fsst.event_ts, CURRENT_DATE) / 12) AS years_in_college
FROM data_mart.fact_student_start_term fsst
INNER JOIN data_mart.dim_person dp USING (dim_person_id)
INNER JOIN data_mart.fact_plan_enrollment dpe ON
fsst.dim_person_id = dpe.dim_person_id
AND fsst.institution_id = dpe.institution_id
LEFT JOIN data_mart.dim_plan dpl ON
dpe.dim_plan_id = dpl.dim_plan_id
INNER JOIN data_mart.dim_career_term dct ON
fsst.civ_start_term_id = dct.civ_term_id
GROUP BY
dp.raw_person_id
, fsst.event_ts
, dpl.raw_award_id
, dct.civ_career_id
, dpl.raw_award_desc
, dpl.raw_award_type_id
)
, completion_score_base AS (
SELECT
fcsl.event_ts AS score_date
, fcsl.civ_term_id AS score_term
, fcsl.civ_career
, dp.institution_person_id
, sst.raw_award_id
, sst.start_date AS start_term_timestamp
, sst.years_in_college
, fcsl.prediction
, fcsl.window_length
, COALESCE(se.civ_is_enrolled, 0) AS civ_is_enrolled
FROM data_mart.fact_completion_scores fcsl
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_completion_window dcw USING (dim_completion_window_id, institution_id, civ_career)
LEFT JOIN student_enrollment se ON
dp.raw_person_id = se.raw_person_id
AND fcsl.civ_career = se.civ_career
AND fcsl.civ_term_id = se.civ_term_id
LEFT JOIN student_start_term sst ON
dp.raw_person_id = sst.raw_person_id
WHERE is_active = 1
AND civ_is_enrolled = 1
)
, completion_score AS (
SELECT
institution_person_id
, civ_career
, raw_award_id
, civ_is_enrolled
, start_term_timestamp
, years_in_college
, CASE
WHEN AVG(prediction) >= 0.90 THEN 'Very High'
WHEN AVG(prediction) >= 0.70 THEN 'High'
WHEN AVG(prediction) >= 0.50 THEN 'Moderate'
WHEN AVG(prediction) >= 0.20 THEN 'Low'
ELSE 'Very Low'
END AS completion_likelihood
, AVG(prediction) AS avg_student_completion_prediction_score
, MIN(prediction) AS min_student_completion_prediction_score
, MAX(prediction) AS max_student_completion_prediction_score
, window_length
, DENSE_RANK() OVER (ORDER BY window_length DESC) AS completion_window_length_rank
FROM completion_score_base
GROUP BY
institution_person_id
, raw_award_id
, civ_career
, civ_is_enrolled
, start_term_timestamp
, years_in_college
, window_length
)
SELECT
institution_person_id
, civ_career
, start_term_timestamp
, raw_award_id
, years_in_college
, completion_likelihood
, avg_student_completion_prediction_score
, min_student_completion_prediction_score
, max_student_completion_prediction_score
, window_length
, CASE
WHEN completion_window_length_rank = 1 THEN 'Long'
WHEN completion_window_length_rank = 2 THEN 'Normal'
WHEN completion_window_length_rank = 3 THEN 'Short'
ELSE 'Unknown'
END AS completion_window_length
FROM completion_score
WHERE completion_window_length IN ('Long')
AND years_in_college IN (4, 5, 6)
ORDER BY
institution_person_id
, avg_student_completion_prediction_score DESC;