Completion Prediction Queries

Student Completion Prediction by Years in College and Awards

This query calculates the average Students completion prediction scores by years in college and awards. It categorizes the completion likelihood into five levels: Very High, High, Moderate, Low, and Very Low.

You can query all students who have been in college for given years or apply more specific filters for completion window length, completion likelihood and specific awards.

-- Find the terms a student has an active enrollment in for each of their careers (academic levels).
WITH student_enrollment AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id AS civ_career
, ds.civ_term_id
, de.civ_is_enrolled
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de USING (dim_enrollment_id)
INNER JOIN data_mart.dim_section ds USING (dim_section_id)
WHERE
de.dim_enrollment_id != 'null_row'
AND ds.dim_section_id != 'null_row'
AND de.civ_is_enrolled = 1
)
, student_start_term AS (
SELECT
dp.raw_person_id
, dct.civ_career_id AS civ_career
, dpl.raw_award_id
, dpl.raw_award_desc
, dpl.raw_award_type_id
, MIN(dct.civ_career_term_start_dt) AS start_date
, FLOOR(DATEDIFF(month, fsst.event_ts, CURRENT_DATE) / 12) AS years_in_college
FROM data_mart.fact_student_start_term fsst
INNER JOIN data_mart.dim_person dp USING (dim_person_id)
INNER JOIN data_mart.fact_plan_enrollment dpe ON
fsst.dim_person_id = dpe.dim_person_id
AND fsst.institution_id = dpe.institution_id
LEFT JOIN data_mart.dim_plan dpl ON
dpe.dim_plan_id = dpl.dim_plan_id
INNER JOIN data_mart.dim_career_term dct ON
fsst.civ_start_term_id = dct.civ_term_id
GROUP BY
dp.raw_person_id
, fsst.event_ts
, dpl.raw_award_id
, dct.civ_career_id
, dpl.raw_award_desc
, dpl.raw_award_type_id
)
, completion_score_base AS (
SELECT
fcsl.event_ts AS score_date
, fcsl.civ_term_id AS score_term
, fcsl.civ_career
, dp.institution_person_id
, sst.raw_award_id
, sst.start_date AS start_term_timestamp
, sst.years_in_college
, fcsl.prediction
, fcsl.window_length
, COALESCE(se.civ_is_enrolled, 0) AS civ_is_enrolled
FROM data_mart.fact_completion_scores fcsl
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_completion_window dcw USING (dim_completion_window_id, institution_id, civ_career)
LEFT JOIN student_enrollment se ON
dp.raw_person_id = se.raw_person_id
AND fcsl.civ_career = se.civ_career
AND fcsl.civ_term_id = se.civ_term_id
LEFT JOIN student_start_term sst ON
dp.raw_person_id = sst.raw_person_id
WHERE is_active = 1 -- Students from active term only
AND civ_is_enrolled = 1 -- Only enrolled students
)
, completion_score AS (
-- Aggregate the prediction score and classify the completion likelihood
SELECT
institution_person_id
, civ_career
, raw_award_id
, civ_is_enrolled
, start_term_timestamp
, years_in_college
, CASE
WHEN AVG(prediction) >= 0.90 THEN 'Very High'
WHEN AVG(prediction) >= 0.70 THEN 'High'
WHEN AVG(prediction) >= 0.50 THEN 'Moderate'
WHEN AVG(prediction) >= 0.20 THEN 'Low'
ELSE 'Very Low'
END AS completion_likelihood
, AVG(prediction) AS avg_student_completion_prediction_score
, MIN(prediction) AS min_student_completion_prediction_score
, MAX(prediction) AS max_student_completion_prediction_score
-- This will classify the completion window length based on the maximum window length
, window_length
, DENSE_RANK() OVER (ORDER BY window_length DESC) AS completion_window_length_rank
FROM completion_score_base
GROUP BY
institution_person_id
, raw_award_id
, civ_career
, civ_is_enrolled
, start_term_timestamp
, years_in_college
, window_length
)
SELECT
institution_person_id
, civ_career
, start_term_timestamp
, raw_award_id
, years_in_college
, completion_likelihood
, avg_student_completion_prediction_score
, min_student_completion_prediction_score
, max_student_completion_prediction_score
, window_length
, CASE
WHEN completion_window_length_rank = 1 THEN 'Long'
WHEN completion_window_length_rank = 2 THEN 'Normal'
WHEN completion_window_length_rank = 3 THEN 'Short'
ELSE 'Unknown'
END AS completion_window_length
FROM completion_score
WHERE completion_window_length IN ('Long') -- You can adjust or add additional completion window lengths
AND years_in_college IN (4, 5, 6) -- Years in college are adjustable as needed
-- AND raw_award_id = '<RAW_AWARD_ID>'
-- AND completion_likelihood = '<COMPLETION_LIKELIHOOD_CATEGORY>' -- Example: 'Low'
ORDER BY
institution_person_id
, avg_student_completion_prediction_score DESC;

Daily Completion Score Shifts

This query retrieves daily completion prediction scores for all students, allowing you to track completion score trends over time. The query aggregates scores by date and provides insights into how completion predictions change daily.

-- Get daily completion scores for all active students
SELECT
dd.dt AS score_date
, dp.institution_person_id
, fcs.civ_career
, dcw.window_length
, MAX(fcs.civ_term_id) AS score_term
, AVG(fcs.prediction) AS daily_avg_completion_score
-- Calculate day-over-day change
, LAG(AVG(fcs.prediction), 1) OVER (
PARTITION BY dp.institution_person_id, fcs.civ_career, dcw.window_length
ORDER BY dd.dt
) AS previous_day_score
, AVG(fcs.prediction) - LAG(AVG(fcs.prediction), 1) OVER (
PARTITION BY dp.institution_person_id, fcs.civ_career, dcw.window_length
ORDER BY dd.dt
) AS score_change_from_previous_day
-- Classify completion likelihood
, CASE
WHEN AVG(fcs.prediction) >= 0.9 THEN 'Very High'
WHEN AVG(fcs.prediction) >= 0.7 THEN 'High'
WHEN AVG(fcs.prediction) >= 0.5 THEN 'Moderate'
WHEN AVG(fcs.prediction) >= 0.2 THEN 'Low'
ELSE 'Very Low'
END AS completion_likelihood
-- Flag significant changes (> 10% change)
, CASE
WHEN ABS(AVG(fcs.prediction) - LAG(AVG(fcs.prediction), 1) OVER (
PARTITION BY dp.institution_person_id, fcs.civ_career, dcw.window_length
ORDER BY dd.dt
)) > 0.1 THEN 'Significant Change'
ELSE 'Stable'
END AS trend_flag
FROM data_mart.fact_completion_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
AND fcs.institution_id = dp.institution_id
INNER JOIN data_mart.dim_date dd ON
fcs.dim_date_id = dd.dim_date_id
INNER JOIN data_mart.dim_completion_window dcw ON
fcs.dim_completion_window_id = dcw.dim_completion_window_id
AND fcs.institution_id = dcw.institution_id
AND fcs.civ_career = dcw.civ_career
WHERE 1 = 1
AND dcw.is_active = 1
AND dd.dt >= CURRENT_DATE - INTERVAL '30 days' -- Last 30 days
-- Optional filters:
-- AND dp.institution_person_id = '<STUDENT_ID>' -- Specific student
-- AND fcs.civ_career = 'U' -- Undergraduate only
-- AND dcw.window_length = 4 -- 4-year completion window
GROUP BY
dd.dt
, dp.institution_person_id
, fcs.civ_career
, dcw.window_length
ORDER BY
dd.dt DESC
, dp.institution_person_id ASC;

Weekly Completion Score Shifts

This query provides weekly aggregated completion scores, useful for identifying longer-term trends and patterns in student completion predictions.

WITH weekly_scores AS (
SELECT
dp.institution_person_id
, fcs.civ_career
, dcw.window_length
, EXTRACT(WEEK FROM dd.dt) AS week_number
, EXTRACT(YEAR FROM dd.dt) AS year_number
, EXTRACT(YEAR FROM dd.dt)::TEXT
|| '-W'
|| LPAD(EXTRACT(WEEK FROM dd.dt)::TEXT, 2, '0') AS week_year
, DATE_TRUNC('week', dd.dt) AS week_start_date
, MAX(fcs.civ_term_id) AS score_term
, AVG(fcs.prediction) AS weekly_avg_completion_score
, COUNT(*) AS score_count
FROM data_mart.fact_completion_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
AND fcs.institution_id = dp.institution_id
INNER JOIN data_mart.dim_date dd ON
fcs.dim_date_id = dd.dim_date_id
INNER JOIN data_mart.dim_completion_window dcw ON
fcs.dim_completion_window_id = dcw.dim_completion_window_id
AND fcs.institution_id = dcw.institution_id
AND fcs.civ_career = dcw.civ_career
WHERE 1 = 1
AND dcw.is_active = 1
AND dd.dt >= CURRENT_DATE - INTERVAL '12 weeks' -- Last 12 weeks
-- Optional filters:
-- AND dp.institution_person_id = '<STUDENT_ID>' -- Specific student
-- AND fcs.civ_career = 'U' -- Undergraduate only
-- AND dcw.window_length = 4 -- 4-year completion window
GROUP BY
EXTRACT(WEEK FROM dd.dt)
, EXTRACT(YEAR FROM dd.dt)
, DATE_TRUNC('week', dd.dt)
, dp.institution_person_id
, fcs.civ_career
, dcw.window_length
)
SELECT
week_year
, week_start_date
, institution_person_id
, civ_career
, score_term
, window_length
, weekly_avg_completion_score
, score_count AS pred_per_week
-- Calculate week-over-week change
, LAG(weekly_avg_completion_score, 1) OVER (
PARTITION BY institution_person_id, civ_career, window_length
ORDER BY week_start_date
) AS previous_week_score
, weekly_avg_completion_score - LAG(weekly_avg_completion_score, 1) OVER (
PARTITION BY institution_person_id, civ_career, window_length
ORDER BY week_start_date
) AS score_change_from_previous_week
-- Classify completion likelihood
, CASE
WHEN weekly_avg_completion_score >= 0.9 THEN 'Very High'
WHEN weekly_avg_completion_score >= 0.7 THEN 'High'
WHEN weekly_avg_completion_score >= 0.5 THEN 'Moderate'
WHEN weekly_avg_completion_score >= 0.2 THEN 'Low'
ELSE 'Very Low'
END AS completion_likelihood
-- Flag significant changes (> 10% change)
, CASE
WHEN ABS(
weekly_avg_completion_score - LAG(weekly_avg_completion_score, 1) OVER (
PARTITION BY institution_person_id, civ_career, window_length
ORDER BY week_start_date
)) > 0.1 THEN 'Significant Change'
ELSE 'Stable'
END AS trend_flag
FROM weekly_scores
ORDER BY
week_start_date DESC
, institution_person_id ASC
, civ_career ASC;

Student Completion Predictions

Specifying <STUDENT_ID> returns the latest 4-year completion prediction for the student. Enrollment flag and actual completion outcome can also be obtained by joining with other tables in the data warehouse.

-- Find the terms a student has an active enrollment in for each of their careers (academic levels).
WITH student_enrollment AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id AS civ_career
, ds.civ_term_id
, de.civ_is_enrolled
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de USING (dim_enrollment_id)
INNER JOIN data_mart.dim_section ds USING (dim_section_id)
WHERE
de.dim_enrollment_id != 'null_row'
AND ds.dim_section_id != 'null_row'
AND de.civ_is_enrolled = 1
)
-- Find the earliest term the student has graduated in for each of their careers (academic levels).
, earliest_completion_term AS (
SELECT
dp.raw_person_id
, dct.civ_career_id AS civ_career
, MIN(dct.civ_career_term_id) AS completion_career_term_id
FROM data_mart.fact_graduation fg
INNER JOIN data_mart.dim_person dp USING (dim_person_id)
INNER JOIN data_mart.dim_career_term dct USING (dim_career_term_id)
WHERE
fg.civ_is_award_conferred = 1
GROUP BY dp.raw_person_id, dct.civ_career_id
)
SELECT
fcsl.event_ts AS score_date
, fcsl.civ_term_id AS score_term
, fcsl.civ_career
, dp.institution_person_id
, fcsl.prediction
, COALESCE(se.civ_is_enrolled, 0) AS civ_is_enrolled
, CASE
WHEN ect.completion_career_term_id <= dcw.civ_end_term_sequence_no
THEN 1
ELSE 0
END AS civ_is_completed
FROM data_mart.fact_completion_scores_latest fcsl
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_completion_window dcw USING (dim_completion_window_id, institution_id, civ_career)
LEFT JOIN student_enrollment se USING (raw_person_id, civ_career, civ_term_id)
LEFT JOIN earliest_completion_term ect USING (raw_person_id, civ_career)
WHERE
fcsl.window_length = 4 -- Adjust this for the graduation window you would like to use (4 year, 5 year, etc.). To see your configured windows: SELECT DISTINCT window_length FROM data_mart.dim_completion_window;
AND dp.institution_person_id = '<STUDENT_ID>';