Persistence and Completion Prediction Queries

Average Persistence and Completion Scores by Origin

This query analyzes average persistence and completion prediction scores across student populations by geographic origin (Domestic, International, and Unknown). It provides insights into how students from different backgrounds perform in terms of continuing their studies and completing their degrees, helping identify potential support needs for specific student populations.

Key Metrics:

  • Persistence Score: Likelihood of continuing to the next term
  • Completion Score: Likelihood of graduating within expected timeframe
  • Student Count: Number of active students in each category
WITH base AS (
-- Source data for both persistence and completion metrics
SELECT
dp.raw_person_id
, dp.institution_person_id
, dp.raw_country_of_origin
, fcs.prediction AS persistence_prediction
, fcs.civ_career
, fcs.event_ts AS persistence_event_ts
FROM data_mart.fact_continuation_scores AS fcs
INNER JOIN data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
)
, persistence AS (
-- Get the latest persistence prediction for each student
SELECT
raw_person_id
, institution_person_id
, raw_country_of_origin
, persistence_prediction AS latest_persistence_prediction
, civ_career
FROM base
WHERE 1 = 1
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
institution_person_id
ORDER BY
persistence_event_ts DESC
) = 1
)
, completion AS (
-- Get the latest completion prediction for each student
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcsl.prediction AS latest_completion_prediction
, fcsl.civ_career
FROM data_mart.fact_completion_scores_latest AS fcsl
INNER JOIN data_mart.dim_person AS dp
ON fcsl.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_completion_window AS dcw
ON fcsl.dim_completion_window_id = dcw.dim_completion_window_id
AND fcsl.institution_id = dcw.institution_id
AND fcsl.civ_career = dcw.civ_career
WHERE dcw.window_length = 4 -- Standard 4-year completion window
)
, active_students AS (
-- Filter for students with active enrollments
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
FROM data_mart.dim_enrollment AS de
INNER JOIN data_mart.dim_section AS ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term AS dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
AND de.civ_is_enrolled = 1
)
, hybrid AS (
-- Combine persistence and completion data
SELECT
p.raw_person_id
, p.institution_person_id
, p.raw_country_of_origin
, p.latest_persistence_prediction
, c.latest_completion_prediction
, p.civ_career
FROM persistence AS p
INNER JOIN active_students AS acts
ON p.raw_person_id = acts.raw_person_id
AND p.civ_career = acts.civ_career_id
LEFT JOIN completion AS c
ON p.raw_person_id = c.raw_person_id
AND p.civ_career = c.civ_career
)
-- Classify students and calculate averages for both persistence and completion
SELECT
CASE
WHEN h.raw_country_of_origin IN (
'American', 'USA', 'United States', 'US', 'U.S.A.', 'U.S.', 'America'
) THEN 'Domestic'
WHEN h.raw_country_of_origin IN (
'Not Applicable'
, 'Not Available'
, 'Unknown'
, 'N/A'
, 'NA'
, 'None'
, 'Unspecified'
, ''
) OR h.raw_country_of_origin IS NULL THEN 'Unknown'
ELSE 'International'
END AS country_of_origin
, AVG(h.latest_persistence_prediction) AS avg_persistence_score
, AVG(h.latest_completion_prediction) AS avg_completion_score
, COUNT(*) AS student_count
FROM hybrid AS h
GROUP BY
country_of_origin
ORDER BY
country_of_origin;

Average Persistence and Completion Scores by GPA Band

This query examines average persistence and completion prediction scores across different GPA bands, providing insights into how academic performance correlates with student success outcomes. The analysis categorizes students into performance likelihood levels and helps identify at-risk populations based on their academic standing.

Key Metrics:

  • Persistence Score: Likelihood of continuing to the next term
  • Completion Score: Likelihood of graduating within expected timeframe
  • Performance Likelihood: Risk categorization (Very High, High, Moderate, Low, Very Low)
  • Student Count: Number of students in each GPA band
WITH base AS (
-- Get the latest persistence prediction for each student
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcs.prediction AS persistence_prediction
, fcs.event_ts AS persistence_event_ts
, de.civ_career_id
FROM data_mart.fact_continuation_scores AS fcs
INNER JOIN data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_enrollment AS de
ON dp.raw_person_id = de.raw_person_id
WHERE de.civ_career_id IN ('U', 'G')
)
, persistence AS (
SELECT
raw_person_id
, institution_person_id
, persistence_prediction AS latest_persistence_prediction
, civ_career_id
FROM base
WHERE 1 = 1
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
institution_person_id
ORDER BY
persistence_event_ts DESC
) = 1
)
, completion AS (
-- Get the latest completion prediction for each student
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcsl.prediction AS latest_completion_prediction
, fcsl.civ_career
FROM data_mart.fact_completion_scores_latest AS fcsl
INNER JOIN data_mart.dim_person AS dp
ON fcsl.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_completion_window AS dcw
ON fcsl.dim_completion_window_id = dcw.dim_completion_window_id
AND fcsl.institution_id = dcw.institution_id
AND fcsl.civ_career = dcw.civ_career
WHERE dcw.window_length = 4
)
, gpa_data AS (
-- Map students to GPA bands based on their raw GPA values
SELECT
dp.institution_person_id
, CASE
WHEN fpg.raw_gpa >= 0.00 AND fpg.raw_gpa < 0.50 THEN '0.0-0.5'
WHEN fpg.raw_gpa >= 0.50 AND fpg.raw_gpa < 1.00 THEN '0.5-1.0'
WHEN fpg.raw_gpa >= 1.00 AND fpg.raw_gpa < 1.50 THEN '1.0-1.5'
WHEN fpg.raw_gpa >= 1.50 AND fpg.raw_gpa < 2.00 THEN '1.5-2.0'
WHEN fpg.raw_gpa >= 2.00 AND fpg.raw_gpa < 2.50 THEN '2.0-2.5'
WHEN fpg.raw_gpa >= 2.50 AND fpg.raw_gpa < 3.00 THEN '2.5-3.0'
WHEN fpg.raw_gpa >= 3.00 AND fpg.raw_gpa < 3.50 THEN '3.0-3.5'
WHEN fpg.raw_gpa >= 3.50 AND fpg.raw_gpa < 4.00 THEN '3.5-4.0'
WHEN fpg.raw_gpa >= 4.00 THEN '>= 4.0'
END AS gpa_band
FROM data_mart.dim_person AS dp
INNER JOIN data_mart.fact_provided_gpa AS fpg
ON dp.dim_person_id = fpg.dim_person_id
WHERE 1 = 1
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
dp.institution_person_id
ORDER BY
fpg.event_ts DESC
) = 1
)
, hybrid AS (
-- Combine persistence and completion with GPA data
SELECT
p.civ_career_id
, gd.gpa_band
, p.latest_persistence_prediction
, c.latest_completion_prediction
, p.institution_person_id
FROM persistence AS p
INNER JOIN gpa_data AS gd
ON p.institution_person_id = gd.institution_person_id
LEFT JOIN completion AS c
ON p.raw_person_id = c.raw_person_id
AND p.civ_career_id = c.civ_career
WHERE p.raw_person_id IN (
-- Filter for active enrollments
SELECT DISTINCT de.raw_person_id
FROM data_mart.dim_enrollment AS de
INNER JOIN data_mart.dim_section AS ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term AS dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
AND de.civ_career_id IN ('U', 'G')
AND de.civ_is_enrolled = 1
)
)
, plan_stats AS (
-- Calculate average predictions and student count per GPA band
SELECT
civ_career_id
, gpa_band
, AVG(latest_persistence_prediction) AS avg_persistence_per_band
, AVG(latest_completion_prediction) AS avg_completion_per_band
, COUNT(DISTINCT institution_person_id) AS student_count
FROM hybrid
GROUP BY
gpa_band
, civ_career_id
)
SELECT
ps.civ_career_id
, ps.gpa_band
, ps.student_count
, TO_CHAR((ps.avg_persistence_per_band) * 100, 'FM999.00')
|| '%' AS avg_persistence_prediction
, TO_CHAR((ps.avg_completion_per_band) * 100, 'FM999.00')
|| '%' AS avg_completion_prediction
, CASE
WHEN ps.avg_persistence_per_band >= 0.90 THEN 'Very High'
WHEN ps.avg_persistence_per_band >= 0.70 THEN 'High'
WHEN ps.avg_persistence_per_band >= 0.50 THEN 'Moderate'
WHEN ps.avg_persistence_per_band >= 0.20 THEN 'Low'
WHEN ps.avg_persistence_per_band < 0.20 THEN 'Very Low'
END AS persistence_likelihood
, CASE
WHEN ps.avg_completion_per_band >= 0.90 THEN 'Very High'
WHEN ps.avg_completion_per_band >= 0.70 THEN 'High'
WHEN ps.avg_completion_per_band >= 0.50 THEN 'Moderate'
WHEN ps.avg_completion_per_band >= 0.20 THEN 'Low'
WHEN ps.avg_completion_per_band < 0.20 THEN 'Very Low'
END AS completion_likelihood
FROM plan_stats AS ps
ORDER BY
ps.gpa_band DESC
, ps.civ_career_id;

Average Persistence and Completion Scores by Term

This query analyzes student success metrics by calculating average persistence and completion prediction scores at the term level. It provides insights into how students are performing across different time periods and can be easily modified to show course-level detail when needed.

Key Metrics:

  • Persistence Score: Likelihood of continuing to the next term
  • Completion Score: Likelihood of graduating within expected timeframe
  • Student Count: Total number of students enrolled in each term
  • Completion Data Coverage: Number of students with available completion predictions

Reporting Levels:

  • Default: Term-level aggregation (high-level summary showing 10 most recent terms)
  • Detailed: Course-level breakdown (uncomment lines marked -- COURSE_DETAIL for granular analysis)

To switch from Term-level to Course-level detail:

  • Find all 5 lines marked with -- COURSE_DETAIL
  • Uncomment them by removing the --
  • This will add course_id and course_title columns, plus appropriate grouping and sorting

Data Scope: Shows actively enrolled students only, using latest prediction scores per student per term.

WITH persistence_scores AS (
-- Latest persistence prediction per student per term
SELECT
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.prediction AS persistence_prediction
FROM data_mart.fact_continuation_scores AS fcs
INNER JOIN data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
WHERE fcs.event_ts = (
SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores AS fcs1
WHERE fcs1.dim_person_id = fcs.dim_person_id
AND fcs1.civ_career = fcs.civ_career
AND fcs1.civ_term_id = fcs.civ_term_id
)
AND fcs.prediction IS NOT NULL
)
, completion_scores AS (
-- Latest completion prediction per student (career-level)
SELECT
dp.raw_person_id
, fcsl.civ_career
, fcsl.prediction AS completion_prediction
FROM data_mart.fact_completion_scores_latest AS fcsl
INNER JOIN data_mart.dim_person AS dp
ON fcsl.dim_person_id = dp.dim_person_id
WHERE fcsl.window_length = 4
AND fcsl.prediction IS NOT NULL
)
, student_enrollments AS (
-- Connect students to their courses and terms
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
, ds.civ_term_id
, ds.civ_course_id
, ds.raw_title AS course_title
, ds.raw_campus_id
, ds.raw_institution_id
, ds.raw_department_id
, dc.raw_subject
, dc.raw_catalog_nbr
FROM data_mart.fact_enroll AS fe
INNER JOIN data_mart.dim_enrollment AS de
ON fe.dim_enrollment_id = de.dim_enrollment_id
INNER JOIN data_mart.dim_section AS ds
ON fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_course AS dc
ON ds.raw_course_id = dc.raw_course_id
WHERE de.civ_is_enrolled = 1
)
, scores_by_term_course AS (
-- Combine all data: enrollments + persistence + completion
SELECT
se.civ_term_id
, se.civ_course_id
, se.course_title
, se.raw_campus_id
, se.raw_institution_id
, se.raw_department_id
, se.raw_subject
, se.raw_catalog_nbr
, se.raw_person_id
, ps.persistence_prediction
, cs.completion_prediction
FROM student_enrollments AS se
INNER JOIN persistence_scores AS ps
ON se.raw_person_id = ps.raw_person_id
AND se.civ_career_id = ps.civ_career
AND se.civ_term_id = ps.civ_term_id
LEFT JOIN completion_scores AS cs
ON se.raw_person_id = cs.raw_person_id
AND se.civ_career_id = cs.civ_career
)
SELECT
sbtc.civ_term_id
-- COURSE_DETAIL: Uncomment next 2 lines for course-level breakdown
--, sbtc.civ_course_id AS course_id
--, sbtc.course_title
, AVG(sbtc.persistence_prediction) AS avg_persistence_score
, AVG(sbtc.completion_prediction) AS avg_completion_score
, COUNT(DISTINCT sbtc.raw_person_id) AS student_count
, COUNT(DISTINCT CASE
WHEN sbtc.completion_prediction IS NOT NULL
THEN sbtc.raw_person_id
END) AS students_with_completion_data
FROM scores_by_term_course AS sbtc
WHERE 1 = 1
--AND sbtc.raw_campus_id = '<CAMPUS_ID>'
--AND sbtc.raw_institution_id = '<INSTITUTION_ID>'
--AND sbtc.raw_department_id = '<DEPARTMENT_ID>'
--AND sbtc.raw_subject = '<SUBJECT>'
--AND sbtc.raw_catalog_nbr = '<CATALOG_NBR>'
GROUP BY
sbtc.civ_term_id
-- COURSE_DETAIL: Uncomment next 2 lines for course-level grouping
--, sbtc.civ_course_id
--, sbtc.course_title
ORDER BY
sbtc.civ_term_id DESC
-- COURSE_DETAIL: Uncomment next line for course-level sorting
--, sbtc.civ_course_id ASC
LIMIT 10;

Dropped Students with Low Persistence and Completion Scores

This query identifies students who have recently dropped at least one course in the active term and exhibit both low persistence and completion prediction scores. It focuses on high-risk students with scores below 0.5 (adjustable threshold) to prioritize intervention efforts and support resources for those most likely to struggle academically.

Key Metrics:

  • Persistence Score: Likelihood of continuing to the next term (< 0.5)
  • Completion Score: Likelihood of graduating within expected timeframe (< 0.5)
  • Days Enrolled: Duration of enrollment before dropping
  • Drop Date: When the student withdrew from the course

Note: The completion window is configurable. Change window_length = 4 to analyze different graduation timeframes. To see available windows: SELECT DISTINCT window_length FROM data_mart.dim_completion_window.

WITH persistence AS (
SELECT
fcs.dim_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.event_ts
, fcs.prediction AS persistence_prediction
FROM data_mart.fact_continuation_scores AS fcs
WHERE fcs.event_ts = (
SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores AS fcs1
WHERE
fcs.dim_person_id = fcs1.dim_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id
)
AND fcs.prediction < 0.5 -- Low persistence threshold
)
, completion AS (
-- Get completion predictions for context on dropped students
SELECT
fcsl.dim_person_id
, fcsl.civ_career
, fcsl.civ_term_id
, fcsl.prediction AS completion_prediction
FROM data_mart.fact_completion_scores_latest AS fcsl
INNER JOIN data_mart.dim_completion_window AS dcw
ON fcsl.dim_completion_window_id = dcw.dim_completion_window_id
AND fcsl.institution_id = dcw.institution_id
AND fcsl.civ_career = dcw.civ_career
WHERE
dcw.window_length = 4
AND fcsl.prediction < 0.5 -- Low completion threshold
)
SELECT DISTINCT
dp.raw_person_id AS person_id
, ds.civ_term_id
, de.raw_drop_dt
, de.raw_drop_dt - de.raw_add_dt AS days_enrolled
, ds.civ_course_id
, ds.raw_title AS course_title
, p.persistence_prediction
, c.completion_prediction
FROM data_mart.fact_enroll AS fe
INNER JOIN data_mart.dim_person AS dp
ON dp.dim_person_id = fe.dim_person_id
INNER JOIN data_mart.dim_section AS ds
ON fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_enrollment AS de
ON fe.dim_enrollment_id = de.dim_enrollment_id
AND de.raw_person_id = dp.raw_person_id
INNER JOIN data_mart.dim_career_term AS dct
ON dct.civ_term_id = ds.civ_term_id
INNER JOIN persistence AS p
ON p.dim_person_id = dp.dim_person_id
AND p.civ_term_id = ds.civ_term_id
LEFT JOIN completion AS c
ON c.dim_person_id = dp.dim_person_id
AND c.civ_term_id = ds.civ_term_id
AND c.civ_career = p.civ_career
WHERE
de.raw_drop_dt >= dct.civ_career_term_start_dt
AND de.raw_drop_dt <= dct.civ_career_term_end_dt
AND dct.is_active = 1
ORDER BY
de.raw_drop_dt DESC
, dp.raw_person_id;

Student Predictions by Credit Load

Analyzes currently enrolled students by career and credit load categories, providing counts and average persistence/completion scores for each group. Credit loads are categorized as part-time or full-time based on total credits:

  • Undergraduate students (Full-time >= 12 credits)
  • Graduate students (Full-time >= 9 credits)
WITH active_enrolled_students AS (
-- Get currently enrolled students in active terms
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
, de.raw_credits
, dct.civ_term_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
ds.raw_term_id = dct.raw_term_id
AND de.civ_career_id = dct.civ_career_id
WHERE
de.civ_is_enrolled = 1 -- Currently enrolled
AND dct.is_active = 1 -- Active term
AND de.raw_career_id <> 'Not Available'
AND de.raw_credits IS NOT NULL
)
, student_credit_load AS (
-- Aggregate total credit load per student
SELECT
aes.raw_person_id
, aes.civ_career_id
, aes.civ_term_id
, SUM(aes.raw_credits) AS total_credits
, CASE
-- Undergraduate students (Full-time >= 12 credits)
WHEN aes.civ_career_id = 'U' THEN
CASE
WHEN SUM(aes.raw_credits) >= 12 THEN 'Full-time'
ELSE 'Part-time'
END
-- Graduate students (Full-time >= 9 credits)
WHEN aes.civ_career_id = 'G' THEN
CASE
WHEN SUM(aes.raw_credits) >= 9 THEN 'Full-time'
ELSE 'Part-time'
END
-- Default for other career types.
-- Only 'U' (Undergraduate) and 'G' (Graduate) are explicitly handled above.
-- Any other value for civ_career_id will be classified as 'Unknown', which may indicate
-- an unrecognized, unsupported, or data quality issue with the career type.
ELSE 'Unknown'
END AS credit_load
FROM active_enrolled_students aes
GROUP BY
aes.raw_person_id
, aes.civ_career_id
, aes.civ_term_id
)
, latest_persistence_scores AS (
-- Get the latest persistence score for each student
SELECT
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.prediction AS persistence_score
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
ORDER BY
fcs.event_ts DESC
) = 1
)
, latest_completion_scores AS (
-- Get the latest completion score for each student
SELECT
dp.raw_person_id
, fcsl.civ_career
, fcsl.civ_term_id
, fcsl.prediction AS completion_score
FROM data_mart.fact_completion_scores_latest fcsl
INNER JOIN data_mart.dim_person dp ON
fcsl.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_completion_window dcw ON
fcsl.dim_completion_window_id = dcw.dim_completion_window_id
WHERE
dcw.is_active = 1
)
-- Final results with career, credit load, and average scores
SELECT
scl.civ_career_id AS career
, scl.credit_load
, COUNT(DISTINCT scl.raw_person_id) AS student_count
, ROUND(AVG(lps.persistence_score), 4) AS avg_persistence_score
, ROUND(AVG(lcs.completion_score), 4) AS avg_completion_score
FROM student_credit_load scl
INNER JOIN latest_persistence_scores lps ON
scl.raw_person_id = lps.raw_person_id
AND scl.civ_career_id = lps.civ_career
AND scl.civ_term_id = lps.civ_term_id
LEFT JOIN latest_completion_scores lcs ON
scl.raw_person_id = lcs.raw_person_id
AND scl.civ_career_id = lcs.civ_career
AND scl.civ_term_id = lcs.civ_term_id
GROUP BY
scl.civ_career_id
, scl.credit_load
ORDER BY
scl.civ_career_id
, scl.credit_load;

Average Persistence and Completion Scores by Transfer Credit Band

This query analyzes the impact of transfer credits on student completion and persistence prediction scores by examining different transfer credit bands for active students who have not yet graduated. It provides insights into how transfer credit volume affects both completion and persistence likelihood among current students, helping optimize articulation agreements and transfer pathways to improve student outcomes and reduce time to degree.

Target Population: Active students with completion predictions who have not yet been awarded a degree

Key Metrics:

  • Completion Score: Likelihood of graduating within the configured completion window (currently set to 4 years)
  • Persistence Score: Likelihood of continuing to the next term
  • Transfer Credit Bands: Grouped by volume (No Transfer, 1-14, 15-29, 30-59, 60+ credits)
  • Completion Likelihood: Risk categorization (Very High, High, Moderate, Low, Very Low)
  • Student Count: Number of active students in each transfer credit band (minimum 10 students per band)

Note: The completion window is configurable. Change window_length = 4 to analyze different graduation timeframes. To see available windows: SELECT DISTINCT window_length FROM data_mart.dim_completion_window

WITH transfer_students AS (
SELECT DISTINCT
ftc.dim_person_id
, SUM(ftc.raw_credit_hours) AS total_transfer_credits
, COUNT(DISTINCT ftc.dim_course_id) AS transfer_courses_count
, MIN(ftc.event_ts) AS first_transfer_date
FROM data_mart.fact_transfer_course AS ftc
WHERE ftc.raw_credit_hours IS NOT NULL
GROUP BY
ftc.dim_person_id
)
, active_students_with_predictions AS (
SELECT
fcsl.dim_person_id
, fcsl.prediction AS completion_prediction
, fcsl.window_length
, fcsl.civ_start_term_sequence_no
, fcsl.event_ts AS prediction_date
, fcsl.institution_id
, fcsl.civ_career
FROM data_mart.fact_completion_scores_latest AS fcsl
INNER JOIN data_mart.dim_completion_window AS dcw ON
fcsl.dim_completion_window_id = dcw.dim_completion_window_id
WHERE fcsl.prediction IS NOT NULL
AND dcw.is_active = 1
)
, persistence_scores AS (
SELECT
fcs.dim_person_id
, fcs.prediction AS persistence_prediction
, fcs.civ_career
FROM data_mart.fact_continuation_scores AS fcs
WHERE fcs.event_ts = (
SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores AS fcs1
WHERE fcs.dim_person_id = fcs1.dim_person_id
AND fcs.civ_career = fcs1.civ_career
)
)
, student_graduation_status AS (
SELECT DISTINCT
dim_person_id
FROM data_mart.fact_graduation
WHERE civ_is_award_conferred = 1
AND event_ts >= '2000-01-01'
AND event_ts <= CURRENT_DATE
)
SELECT
CASE
WHEN ts.total_transfer_credits >= 60 THEN '60+ Credits'
WHEN ts.total_transfer_credits >= 30 THEN '30-59 Credits'
WHEN ts.total_transfer_credits >= 15 THEN '15-29 Credits'
WHEN ts.total_transfer_credits > 0 THEN '1-14 Credits'
ELSE 'No Transfer Credits'
END AS transfer_credit_band
, COUNT(DISTINCT asp.dim_person_id) AS total_students
, CAST(ROUND(AVG(CASE
WHEN ts.total_transfer_credits IS NOT NULL
THEN ts.total_transfer_credits
END), 2) AS DECIMAL(10,2)) AS avg_transfer_credits
, CAST(ROUND(AVG(asp.completion_prediction) * 100, 2) AS DECIMAL(10,2)) AS avg_student_completion_prediction_score
, CAST(ROUND(AVG(ps.persistence_prediction) * 100, 2) AS DECIMAL(10,2)) AS avg_student_persistence_prediction_score
-- Standard absolute thresholds for completion_likelihood
, CASE
WHEN ROUND(AVG(asp.completion_prediction) * 100, 2) >= 90 THEN 'Very High'
WHEN ROUND(AVG(asp.completion_prediction) * 100, 2) >= 70 THEN 'High'
WHEN ROUND(AVG(asp.completion_prediction) * 100, 2) >= 50 THEN 'Moderate'
WHEN ROUND(AVG(asp.completion_prediction) * 100, 2) >= 20 THEN 'Low'
ELSE 'Very Low'
END AS completion_likelihood
, CAST(ROUND(MIN(asp.completion_prediction) * 100, 2) AS DECIMAL(10,2)) AS min_student_completion_prediction_score
, CAST(ROUND(MAX(asp.completion_prediction) * 100, 2) AS DECIMAL(10,2)) AS max_student_completion_prediction_score
FROM active_students_with_predictions AS asp
LEFT JOIN transfer_students AS ts ON
asp.dim_person_id = ts.dim_person_id
LEFT JOIN persistence_scores AS ps ON
asp.dim_person_id = ps.dim_person_id
AND asp.civ_career = ps.civ_career
LEFT JOIN student_graduation_status AS sgs ON
asp.dim_person_id = sgs.dim_person_id
WHERE sgs.dim_person_id IS NULL
GROUP BY
CASE
WHEN ts.total_transfer_credits >= 60 THEN '60+ Credits'
WHEN ts.total_transfer_credits >= 30 THEN '30-59 Credits'
WHEN ts.total_transfer_credits >= 15 THEN '15-29 Credits'
WHEN ts.total_transfer_credits > 0 THEN '1-14 Credits'
ELSE 'No Transfer Credits'
END
HAVING COUNT(DISTINCT asp.dim_person_id) >= 10
ORDER BY
MIN(CASE
WHEN ts.total_transfer_credits >= 60 THEN 1
WHEN ts.total_transfer_credits >= 30 THEN 2
WHEN ts.total_transfer_credits >= 15 THEN 3
WHEN ts.total_transfer_credits > 0 THEN 4
ELSE 5
END);

Financial Aid Impact Analysis

This query analyzes the effectiveness of different financial aid types on student persistence outcomes. It compares persistence and completion scores across various aid categories and first-generation status. The query helps optimize aid distribution based on demonstrated student success impact.

WITH latest_term AS (
-- Pick the latest civ_term_id with active career within fact_continuation_scores
SELECT
dct.civ_term_id
, dct.civ_career_term_start_dt
, dct.civ_career_term_end_dt
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_career_term dct ON
fcs.civ_term_id = dct.civ_term_id
WHERE fcs.civ_career IN ('U', 'G')
ORDER BY fcs.event_ts DESC
LIMIT 1
)
, students_per_aid_type AS (
SELECT
dp.institution_person_id
, dat.civ_aid_category
, fa.raw_disbursed_amount
, dp.civ_first_to_attend_college
FROM data_mart.fact_aid fa
INNER JOIN data_mart.dim_aid_type dat ON
fa.dim_aid_type_id = dat.dim_aid_type_id
INNER JOIN data_mart.dim_person dp ON
fa.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_career_term dct ON
fa.dim_career_term_id = dct.dim_career_term_id
INNER JOIN latest_term lt ON
dct.civ_term_id = lt.civ_term_id
)
, latest_prediction_per_student AS (
-- Pick latest prediction per student on latest term available
SELECT
dp.institution_person_id
, fcs.prediction AS latest_prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
WHERE dp.raw_person_id
IN (SELECT DISTINCT de.raw_person_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
ds.raw_term_id = dct.raw_term_id
WHERE dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
AND de.civ_career_id IN ('U', 'G') -- Limit to undergraduate and graduate
AND de.civ_is_enrolled = 1)
QUALIFY ROW_NUMBER()
OVER (PARTITION BY dp.institution_person_id ORDER BY fcs.event_ts DESC) = 1
ORDER BY dp.institution_person_id
)
, latest_completion_per_student AS (
-- Pick latest completion prediction per student
SELECT
dp.institution_person_id
, fcsl.prediction AS latest_completion_prediction
FROM data_mart.fact_completion_scores fcsl
INNER JOIN data_mart.dim_person dp ON
fcsl.dim_person_id = dp.dim_person_id
WHERE dp.raw_person_id
IN (SELECT DISTINCT de.raw_person_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
ds.raw_term_id = dct.raw_term_id
WHERE dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
AND de.civ_career_id IN ('U', 'G')
AND de.civ_is_enrolled = 1)
QUALIFY ROW_NUMBER()
OVER (PARTITION BY dp.institution_person_id ORDER BY fcsl.event_ts DESC) = 1
)
, prediction_aid_type_student AS (
SELECT
spat.civ_aid_category
, spat.institution_person_id
, spat.civ_first_to_attend_college
, lpps.latest_prediction
, lcps.latest_completion_prediction
, SUM(spat.raw_disbursed_amount) AS total_aid_amount
FROM students_per_aid_type spat
INNER JOIN latest_prediction_per_student lpps ON
spat.institution_person_id = lpps.institution_person_id
LEFT JOIN latest_completion_per_student lcps ON
spat.institution_person_id = lcps.institution_person_id
GROUP BY
spat.civ_aid_category
, spat.institution_person_id
, spat.civ_first_to_attend_college
, lpps.latest_prediction
, lcps.latest_completion_prediction
)
, prediction_aid_type_group_student AS (
SELECT
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped')
THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant'
THEN 'Other Grant'
ELSE pats.civ_aid_category
END AS aid_category
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END AS student_type
, pats.institution_person_id
, pats.latest_prediction
, pats.latest_completion_prediction
, pats.total_aid_amount
, AVG(pats.latest_prediction) OVER (
PARTITION BY
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped') THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant' THEN 'Other Grant'
ELSE pats.civ_aid_category
END
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END
) AS avg_prediction_per_category
, AVG(pats.latest_completion_prediction) OVER (
PARTITION BY
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped') THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant' THEN 'Other Grant'
ELSE pats.civ_aid_category
END
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END
) AS avg_completion_per_category
, MIN(pats.latest_prediction) OVER (
PARTITION BY
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped') THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant' THEN 'Other Grant'
ELSE pats.civ_aid_category
END
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END
) AS min_prediction_per_category
, MAX(pats.latest_prediction) OVER (
PARTITION BY
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped') THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant' THEN 'Other Grant'
ELSE pats.civ_aid_category
END
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END
) AS max_prediction_per_category
, COUNT(pats.institution_person_id) OVER (
PARTITION BY
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped') THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant' THEN 'Other Grant'
ELSE pats.civ_aid_category
END
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END
) AS student_count
, AVG(pats.total_aid_amount) OVER (
PARTITION BY
CASE
WHEN pats.civ_aid_category IN ('Other', 'unmapped') THEN 'Other Financial Aid'
WHEN pats.civ_aid_category = 'Grant' THEN 'Other Grant'
ELSE pats.civ_aid_category
END
, CASE
WHEN pats.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Continuing Generation'
END
) AS avg_aid_amount
FROM prediction_aid_type_student pats
)
SELECT
patgs.student_type
, patgs.aid_category
, TO_CHAR(MAX(patgs.avg_prediction_per_category) * 100, 'FM999.00') || '%' AS avg_persistence_score
, TO_CHAR(MAX(patgs.avg_completion_per_category) * 100, 'FM999.00') || '%' AS avg_completion_score
, CASE
WHEN MAX(patgs.avg_prediction_per_category) >= 0.90
THEN 'Very High'
WHEN MAX(patgs.avg_prediction_per_category) >= 0.70
THEN 'High'
WHEN MAX(patgs.avg_prediction_per_category) >= 0.50
THEN 'Moderate'
WHEN MAX(patgs.avg_prediction_per_category) >= 0.20
THEN 'Low'
WHEN MAX(patgs.avg_prediction_per_category) < 0.20
THEN 'Very Low'
END AS persistence_likelihood
, CASE
WHEN MAX(patgs.avg_completion_per_category) >= 0.90
THEN 'Very High'
WHEN MAX(patgs.avg_completion_per_category) >= 0.70
THEN 'High'
WHEN MAX(patgs.avg_completion_per_category) >= 0.50
THEN 'Moderate'
WHEN MAX(patgs.avg_completion_per_category) >= 0.20
THEN 'Low'
WHEN MAX(patgs.avg_completion_per_category) < 0.20
THEN 'Very Low'
END AS completion_likelihood
, MAX(patgs.student_count) AS student_count
, ROUND(MAX(patgs.avg_aid_amount), 2) AS avg_aid_amount
, ROUND(COUNT(CASE WHEN patgs.latest_prediction >= 0.7 THEN 1 END) * 100.0 / MAX(patgs.student_count), 2) AS high_persistence_percentage
, ROUND(COUNT(CASE WHEN patgs.latest_completion_prediction >= 0.7 THEN 1 END) * 100.0 / MAX(patgs.student_count), 2) AS high_completion_percentage
FROM prediction_aid_type_group_student patgs
GROUP BY patgs.student_type, patgs.aid_category
ORDER BY patgs.student_type, MAX(patgs.avg_prediction_per_category) DESC;

Student Predictions by Course

This query aggregates student persistence and completion prediction scores by course. It provides course-level analytics showing average predictions, student counts, and prediction distribution categories for enrolled students in active terms.

Optional: Modify career filters, prediction thresholds, and completion window_length as needed

WITH active_students AS (
-- Get students currently enrolled in active terms
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
, de.raw_section_id
, ds.raw_course_id
, ds.raw_subject
, ds.raw_catalog_nbr
, ds.raw_title
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND e.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1
)
, latest_persistence_scores AS (
-- Get the latest persistence prediction for each student
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcs.civ_career
, fcs.prediction AS persistence_prediction
, fcs.event_ts AS persistence_score_date
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp
ON fcs.dim_person_id = dp.dim_person_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY dp.institution_person_id, fcs.civ_career
ORDER BY fcs.event_ts DESC
) = 1
)
, latest_completion_scores AS (
-- Get the latest completion prediction for each student
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcsl.civ_career
, fcsl.prediction AS completion_prediction
, fcsl.event_ts AS completion_score_date
, fcsl.window_length
FROM data_mart.fact_completion_scores_latest fcsl
INNER JOIN data_mart.dim_person dp
ON fcsl.dim_person_id = dp.dim_person_id
)
, course_predictions AS (
-- Combine predictions with course information
SELECT
acts.raw_course_id
, acts.raw_subject
, acts.raw_catalog_nbr
, acts.raw_title
, acts.civ_career_id
, acts.raw_person_id
, lps.persistence_prediction
, lcs.completion_prediction
-- Categorize persistence likelihood
, CASE
WHEN lps.persistence_prediction >= 0.80 THEN 'Very High'
WHEN lps.persistence_prediction >= 0.60 THEN 'High'
WHEN lps.persistence_prediction >= 0.40 THEN 'Moderate'
WHEN lps.persistence_prediction >= 0.20 THEN 'Low'
WHEN lps.persistence_prediction < 0.20 THEN 'Very Low'
ELSE NULL
END AS persistence_category
-- Categorize completion likelihood
, CASE
WHEN lcs.completion_prediction >= 0.80 THEN 'Very High'
WHEN lcs.completion_prediction >= 0.60 THEN 'High'
WHEN lcs.completion_prediction >= 0.40 THEN 'Moderate'
WHEN lcs.completion_prediction >= 0.20 THEN 'Low'
WHEN lcs.completion_prediction < 0.20 THEN 'Very Low'
ELSE NULL
END AS completion_category
-- This will classify the completion window length based on the maximum window length
, lcs.window_length
, DENSE_RANK() OVER (ORDER BY lcs.window_length DESC) AS completion_window_length_rank
FROM active_students acts
LEFT JOIN latest_persistence_scores lps
ON acts.raw_person_id = lps.raw_person_id
AND acts.civ_career_id = lps.civ_career
LEFT JOIN latest_completion_scores lcs
ON acts.raw_person_id = lcs.raw_person_id
AND acts.civ_career_id = lcs.civ_career
WHERE
(lps.persistence_prediction IS NOT NULL OR lcs.completion_prediction IS NOT NULL)
GROUP BY
acts.raw_course_id
, acts.raw_subject
, acts.raw_catalog_nbr
, acts.raw_title
, acts.civ_career_id
, acts.raw_person_id
, lps.persistence_prediction
, lcs.completion_prediction
, lcs.window_length
)
-- Final aggregated results by course
SELECT
raw_subject + ' ' + raw_catalog_nbr AS course_code
, raw_title AS course_title
, civ_career_id AS career
-- Student counts
, COUNT(*) AS total_students
, COUNT(persistence_prediction) AS students_with_persistence_scores
, COUNT(completion_prediction) AS students_with_completion_scores
-- Persistence metrics
, AVG(persistence_prediction) AS avg_persistence_score
, MIN(persistence_prediction) AS min_persistence_score
, MAX(persistence_prediction) AS max_persistence_score
-- Completion metrics
, AVG(completion_prediction) AS avg_completion_score
, MIN(completion_prediction) AS min_completion_score
, MAX(completion_prediction) AS max_completion_score
-- Persistence distribution
, SUM(CASE WHEN persistence_category = 'Very High' THEN 1 ELSE 0 END) AS persistence_very_high_count
, SUM(CASE WHEN persistence_category = 'High' THEN 1 ELSE 0 END) AS persistence_high_count
, SUM(CASE WHEN persistence_category = 'Moderate' THEN 1 ELSE 0 END) AS persistence_moderate_count
, SUM(CASE WHEN persistence_category = 'Low' THEN 1 ELSE 0 END) AS persistence_low_count
, SUM(CASE WHEN persistence_category = 'Very Low' THEN 1 ELSE 0 END) AS persistence_very_low_count
-- Completion distribution
, SUM(CASE WHEN completion_category = 'Very High' THEN 1 ELSE 0 END) AS completion_very_high_count
, SUM(CASE WHEN completion_category = 'High' THEN 1 ELSE 0 END) AS completion_high_count
, SUM(CASE WHEN completion_category = 'Moderate' THEN 1 ELSE 0 END) AS completion_moderate_count
, SUM(CASE WHEN completion_category = 'Low' THEN 1 ELSE 0 END) AS completion_low_count
, SUM(CASE WHEN completion_category = 'Very Low' THEN 1 ELSE 0 END) AS completion_very_low_count
-- Risk indicators
, ROUND(
(SUM(CASE WHEN persistence_category IN ('Low', 'Very Low') THEN 1 ELSE 0 END) * 100.0)
/ NULLIF(COUNT(persistence_prediction), 0), 2
) AS persistence_at_risk_percentage
, ROUND(
(SUM(CASE WHEN completion_category IN ('Low', 'Very Low') THEN 1 ELSE 0 END) * 100.0)
/ NULLIF(COUNT(completion_prediction), 0), 2
) AS completion_at_risk_percentage
, CASE
WHEN completion_window_length_rank = 1 THEN 'Long'
WHEN completion_window_length_rank = 2 THEN 'Normal'
WHEN completion_window_length_rank = 3 THEN 'Short'
ELSE 'Unknown'
END AS completion_window_length
FROM course_predictions
WHERE completion_window_length IN ('Long') -- You can adjust or add additional completion window lengths
GROUP BY
raw_subject
, raw_catalog_nbr
, raw_title
, civ_career_id
, completion_window_length
HAVING
COUNT(*) >= 5 -- Only show courses with at least 5 students for statistical significance
ORDER BY
raw_subject
, raw_catalog_nbr
, civ_career_id;