Completion Prediction Queries
Student Completion Prediction by Years in College and Awards
This query calculates the average Students completion prediction scores by years in college and awards. It categorizes the completion likelihood into five levels: Very High, High, Moderate, Low, and Very Low.
You can query all students who have been in college for given years or apply more specific filters for completion window length, completion likelihood and specific awards.
-- Find the terms a student has an active enrollment in for each of their careers (academic levels).
WITH student_enrollment AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id AS civ_career
, ds.civ_term_id
, de.civ_is_enrolled
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de USING (dim_enrollment_id)
INNER JOIN data_mart.dim_section ds USING (dim_section_id)
WHERE
de.dim_enrollment_id != 'null_row'
AND ds.dim_section_id != 'null_row'
AND de.civ_is_enrolled = 1
)
, student_start_term AS (
SELECT
dp.raw_person_id
, dct.civ_career_id AS civ_career
, dpl.raw_award_id
, dpl.raw_award_desc
, dpl.raw_award_type_id
, MIN(dct.civ_career_term_start_dt) AS start_date
, FLOOR(DATEDIFF(month, fsst.event_ts, CURRENT_DATE) / 12) AS years_in_college
FROM data_mart.fact_student_start_term fsst
INNER JOIN data_mart.dim_person dp USING (dim_person_id)
INNER JOIN data_mart.fact_plan_enrollment dpe ON
fsst.dim_person_id = dpe.dim_person_id
AND fsst.institution_id = dpe.institution_id
LEFT JOIN data_mart.dim_plan dpl ON
dpe.dim_plan_id = dpl.dim_plan_id
INNER JOIN data_mart.dim_career_term dct ON
fsst.civ_start_term_id = dct.civ_term_id
GROUP BY
dp.raw_person_id
, fsst.event_ts
, dpl.raw_award_id
, dct.civ_career_id
, dpl.raw_award_desc
, dpl.raw_award_type_id
)
, completion_score_base AS (
SELECT
fcsl.event_ts AS score_date
, fcsl.civ_term_id AS score_term
, fcsl.civ_career
, dp.institution_person_id
, sst.raw_award_id
, sst.start_date AS start_term_timestamp
, sst.years_in_college
, fcsl.prediction
, fcsl.window_length
, COALESCE(se.civ_is_enrolled, 0) AS civ_is_enrolled
FROM data_mart.fact_completion_scores fcsl
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_completion_window dcw USING (dim_completion_window_id, institution_id, civ_career)
LEFT JOIN student_enrollment se ON
dp.raw_person_id = se.raw_person_id
AND fcsl.civ_career = se.civ_career
AND fcsl.civ_term_id = se.civ_term_id
LEFT JOIN student_start_term sst ON
dp.raw_person_id = sst.raw_person_id
WHERE is_active = 1 -- Students from active term only
AND civ_is_enrolled = 1 -- Only enrolled students
)
, completion_score AS (
-- Aggregate the prediction score and classify the completion likelihood
SELECT
institution_person_id
, civ_career
, raw_award_id
, civ_is_enrolled
, start_term_timestamp
, years_in_college
, CASE
WHEN AVG(prediction) >= 0.90 THEN 'Very High'
WHEN AVG(prediction) >= 0.70 THEN 'High'
WHEN AVG(prediction) >= 0.50 THEN 'Moderate'
WHEN AVG(prediction) >= 0.20 THEN 'Low'
ELSE 'Very Low'
END AS completion_likelihood
, AVG(prediction) AS avg_student_completion_prediction_score
, MIN(prediction) AS min_student_completion_prediction_score
, MAX(prediction) AS max_student_completion_prediction_score
-- This will classify the completion window length based on the maximum window length
, window_length
, DENSE_RANK() OVER (ORDER BY window_length DESC) AS completion_window_length_rank
FROM completion_score_base
GROUP BY
institution_person_id
, raw_award_id
, civ_career
, civ_is_enrolled
, start_term_timestamp
, years_in_college
, window_length
)
SELECT
institution_person_id
, civ_career
, start_term_timestamp
, raw_award_id
, years_in_college
, completion_likelihood
, avg_student_completion_prediction_score
, min_student_completion_prediction_score
, max_student_completion_prediction_score
, window_length
, CASE
WHEN completion_window_length_rank = 1 THEN 'Long'
WHEN completion_window_length_rank = 2 THEN 'Normal'
WHEN completion_window_length_rank = 3 THEN 'Short'
ELSE 'Unknown'
END AS completion_window_length
FROM completion_score
WHERE completion_window_length IN ('Long') -- You can adjust or add additional completion window lengths
AND years_in_college IN (4, 5, 6) -- Years in college are adjustable as needed
-- AND raw_award_id = '<RAW_AWARD_ID>'
-- AND completion_likelihood = '<COMPLETION_LIKELIHOOD_CATEGORY>' -- Example: 'Low'
ORDER BY
institution_person_id
, avg_student_completion_prediction_score DESC;
Student Completion Predictions
Specifying '<STUDENT_ID>' returns the latest 4-year completion prediction for the student. Enrollment flag and actual completion outcome can also be obtained by joining with other tables in the data warehouse.
-- Find the terms a student has an active enrollment in for each of their careers (academic levels).
WITH student_enrollment AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id AS civ_career
, ds.civ_term_id
, de.civ_is_enrolled
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de USING (dim_enrollment_id)
INNER JOIN data_mart.dim_section ds USING (dim_section_id)
WHERE
de.dim_enrollment_id != 'null_row'
AND ds.dim_section_id != 'null_row'
AND de.civ_is_enrolled = 1
)
-- Find the earliest term the student has graduated in for each of their careers (academic levels).
, earliest_completion_term AS (
SELECT
dp.raw_person_id
, dct.civ_career_id AS civ_career
, MIN(dct.civ_career_term_id) AS completion_career_term_id
FROM data_mart.fact_graduation fg
INNER JOIN data_mart.dim_person dp USING (dim_person_id)
INNER JOIN data_mart.dim_career_term dct USING (dim_career_term_id)
WHERE
fg.civ_is_award_conferred = 1
GROUP BY dp.raw_person_id, dct.civ_career_id
)
SELECT
fcsl.event_ts AS score_date
, fcsl.civ_term_id AS score_term
, fcsl.civ_career
, dp.institution_person_id
, fcsl.prediction
, COALESCE(se.civ_is_enrolled, 0) AS civ_is_enrolled
, CASE
WHEN ect.completion_career_term_id <= dcw.civ_end_term_sequence_no
THEN 1
ELSE 0
END AS civ_is_completed
FROM data_mart.fact_completion_scores_latest fcsl
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_completion_window dcw USING (dim_completion_window_id, institution_id, civ_career)
LEFT JOIN student_enrollment se USING (raw_person_id, civ_career, civ_term_id)
LEFT JOIN earliest_completion_term ect USING (raw_person_id, civ_career)
WHERE
fcsl.window_length = 4 -- Adjust this for the graduation window you would like to use (4 year, 5 year, etc.). To see your configured windows: SELECT DISTINCT window_length FROM data_mart.dim_completion_window;
AND dp.institution_person_id = '<STUDENT_ID>';