Persistence + Completion Prediction Queries

Student Predictions by Credit Load

Analyzes currently enrolled students by career and credit load categories, providing counts and average persistence/completion scores for each group. Credit loads are categorized as part-time or full-time based on total credits:

  • Undergraduate students (Full-time >= 12 credits)
  • Graduate students (Full-time >= 9 credits)
WITH active_enrolled_students AS (
-- Get currently enrolled students in active terms
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
, de.raw_credits
, dct.civ_term_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
ds.raw_term_id = dct.raw_term_id
AND de.civ_career_id = dct.civ_career_id
WHERE
de.civ_is_enrolled = 1 -- Currently enrolled
AND dct.is_active = 1 -- Active term
AND de.raw_career_id <> 'Not Available'
AND de.raw_credits IS NOT NULL
)
, student_credit_load AS (
-- Aggregate total credit load per student
SELECT
aes.raw_person_id
, aes.civ_career_id
, aes.civ_term_id
, SUM(aes.raw_credits) AS total_credits
, CASE
-- Undergraduate students (Full-time >= 12 credits)
WHEN aes.civ_career_id = 'U' THEN
CASE
WHEN SUM(aes.raw_credits) >= 12 THEN 'Full-time'
ELSE 'Part-time'
END
-- Graduate students (Full-time >= 9 credits)
WHEN aes.civ_career_id = 'G' THEN
CASE
WHEN SUM(aes.raw_credits) >= 9 THEN 'Full-time'
ELSE 'Part-time'
END
-- Default for other career types.
-- Only 'U' (Undergraduate) and 'G' (Graduate) are explicitly handled above.
-- Any other value for civ_career_id will be classified as 'Unknown', which may indicate
-- an unrecognized, unsupported, or data quality issue with the career type.
ELSE 'Unknown'
END AS credit_load
FROM active_enrolled_students aes
GROUP BY
aes.raw_person_id
, aes.civ_career_id
, aes.civ_term_id
)
, latest_persistence_scores AS (
-- Get the latest persistence score for each student
SELECT
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.prediction AS persistence_score
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
ORDER BY
fcs.event_ts DESC
) = 1
)
, latest_completion_scores AS (
-- Get the latest completion score for each student
SELECT
dp.raw_person_id
, fcsl.civ_career
, fcsl.civ_term_id
, fcsl.prediction AS completion_score
FROM data_mart.fact_completion_scores_latest fcsl
INNER JOIN data_mart.dim_person dp ON
fcsl.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_completion_window dcw ON
fcsl.dim_completion_window_id = dcw.dim_completion_window_id
WHERE
dcw.is_active = 1
)
-- Final results with career, credit load, and average scores
SELECT
scl.civ_career_id AS career
, scl.credit_load
, COUNT(DISTINCT scl.raw_person_id) AS student_count
, ROUND(AVG(lps.persistence_score), 4) AS avg_persistence_score
, ROUND(AVG(lcs.completion_score), 4) AS avg_completion_score
FROM student_credit_load scl
INNER JOIN latest_persistence_scores lps ON
scl.raw_person_id = lps.raw_person_id
AND scl.civ_career_id = lps.civ_career
AND scl.civ_term_id = lps.civ_term_id
LEFT JOIN latest_completion_scores lcs ON
scl.raw_person_id = lcs.raw_person_id
AND scl.civ_career_id = lcs.civ_career
AND scl.civ_term_id = lcs.civ_term_id
GROUP BY
scl.civ_career_id
, scl.credit_load
ORDER BY
scl.civ_career_id
, scl.credit_load;