Examples

Here are example queries to get you started. Feel free to copy these and change what you need.

Latest Student Feature Values

Specifying <STUDENT_ID> returns all the latest feature values for the student and the date they were calculated.

Note: Due to the different data types, the value will be in one of the text, int, float or bool columns.

SELECT
fcfl.event_ts AS score_date
, dp.institution_person_id
, dcf.civ_continuation_feature_id
, dcf.civ_continuation_feature_name
, dcf.civ_continuation_feature_description
, fcfl.text_value
, fcfl.int_value
, fcfl.float_value
, fcfl.bool_value
FROM data_mart.fact_continuation_feature_latest fcfl
INNER JOIN data_mart.dim_continuation_feature dcf USING (dim_continuation_feature_id, institution_id)
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_date dd USING (dim_date_id, institution_id)
WHERE
dp.institution_person_id = '<STUDENT_ID>'
ORDER BY dcf.civ_continuation_feature_id;

Student Continuation Predictions

This query fetches all historical continuation prediction scores, the institution ID, and the date the score was generated for desired <STUDENT_ID>.

SELECT
fcs.event_ts AS score_date
, dp.institution_person_id
, fcs.civ_career
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
WHERE
dp.institution_person_id = '<STUDENT_ID>'
ORDER BY dp.institution_person_id, fcs.event_ts DESC;

Student Persistence Score Shift

This query returns the latest persistence (continuation) score for each student and career (academic level), and calculates the one day and one week shift in this score so that the customer can identify students whose score has changed recently. This query can be adapted to calculate score shifts other than one day or week.

The query also displays the Civitas defined bucket for the student's latest persistence score using the default ranges for these buckets: Very Low, Low, Moderate, High, Very High

-- Find the latest persistence score for each student and career.
WITH latest_continuation_score AS (
SELECT
fcs.dim_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.event_ts
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
WHERE
-- This step ensures that the query returns the latest score for each student.
fcs.event_ts = (SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores fcs1
WHERE fcs.dim_person_id = fcs1.dim_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id)
-- This step ensures that the query returns scores for currently enrolled students only.
-- Only currently enrolled students will have a score on the latest day.
AND fcs.event_ts = (SELECT MAX(fcs2.event_ts)
FROM data_mart.fact_continuation_scores fcs2)
)
, continuation_score_shift AS (
-- Change the '7 days' or '1 day' thresholds below to use a different window for the score shift.
SELECT
lcs.dim_person_id
, lcs.civ_career
, lcs.civ_term_id
, lcs.event_ts
, lcs.prediction AS current_prediction
, fcs_shift_7.prediction AS previous_prediction_7_days
, fcs_shift_1.prediction AS previous_prediction_1_day
FROM latest_continuation_score lcs
LEFT JOIN data_mart.fact_continuation_scores fcs_shift_7 ON
lcs.dim_person_id = fcs_shift_7.dim_person_id
AND lcs.civ_career = fcs_shift_7.civ_career
AND lcs.civ_term_id = fcs_shift_7.civ_term_id
AND (lcs.event_ts - INTERVAL '7 days') = fcs_shift_7.event_ts
LEFT JOIN data_mart.fact_continuation_scores fcs_shift_1 ON
lcs.dim_person_id = fcs_shift_1.dim_person_id
AND lcs.civ_career = fcs_shift_1.civ_career
AND lcs.civ_term_id = fcs_shift_1.civ_term_id
AND (lcs.event_ts - INTERVAL '1 day') = fcs_shift_1.event_ts
)
SELECT
dp.institution_person_id
, css.civ_career AS civitas_career_id
, css.civ_term_id
, css.event_ts::DATE AS event_date
-- Note, these are the default ranges. They may have been adjusted for your institution.
, CASE
WHEN css.current_prediction >= 0.90 THEN 'Very High'
WHEN css.current_prediction >= 0.70 THEN 'High'
WHEN css.current_prediction >= 0.50 THEN 'Moderate'
WHEN css.current_prediction >= 0.20 THEN 'Low'
WHEN css.current_prediction < 0.20 THEN 'Very Low'
ELSE NULL
END AS persistence_likelihood -- Persistence Likelihood (Very High, High, Moderate, Low, Very Low)
, css.current_prediction AS persistence_prediction_score -- Persistence Prediction Score %
, css.current_prediction - css.previous_prediction_7_days AS weekly_prediction_score_shift -- Weekly Change in Persistence Score
, css.current_prediction - css.previous_prediction_1_day AS daily_prediction_score_shift --Daily Change in Persistence Score
FROM data_mart.dim_person dp
INNER JOIN continuation_score_shift css ON
dp.dim_person_id = css.dim_person_id
WHERE
CURRENT_DATE >= dp.raw_eff_ts
AND CURRENT_DATE < dp.raw_eff_ts_end

Student Continuation Prediction by Age

This query calculates the average prediction score of students across different age buckets and counts the number of students in each bucket, based on the most recent persistence score date.

WITH latest_continuation_score AS (
SELECT
fcs1.dim_person_id
, fcs1.civ_career
, fcs1.event_ts
, fcs1.prediction
, fcs1.dim_date_id
, fcs1.civ_term_id
FROM data_mart.fact_continuation_scores fcs1
WHERE fcs1.event_ts = (SELECT MAX(fcs2.event_ts)
FROM data_mart.fact_continuation_scores fcs2)
)
, student_ages AS (
SELECT DISTINCT
dp.institution_person_id
, dp.institution_id
, dp.dim_person_id
, DATEDIFF(YEAR, dp.raw_birth_dt::DATE, CURRENT_DATE)
- (CASE -- Account for birthdays that have not occurred yet this year
WHEN TO_CHAR(CURRENT_DATE, 'MMDD') < TO_CHAR(dp.raw_birth_dt::DATE, 'MMDD')
THEN 1 ELSE 0
END) AS age
FROM data_mart.dim_person dp
WHERE age IS NOT NULL
)
, age_buckets AS (
SELECT
sa.institution_person_id,
sa.dim_person_id,
sa.institution_id,
fcs.prediction,
CASE
WHEN sa.age <= 18 THEN '18_and_under'
WHEN sa.age = 19 THEN '19'
WHEN sa.age = 20 THEN '20'
WHEN sa.age = 21 THEN '21'
WHEN sa.age = 22 THEN '22'
WHEN sa.age = 23 THEN '23'
WHEN sa.age = 24 THEN '24'
WHEN sa.age BETWEEN 25 AND 29 THEN '25 to 29'
WHEN sa.age BETWEEN 30 AND 34 THEN '30 to 34'
WHEN sa.age BETWEEN 35 AND 39 THEN '35 to 39'
ELSE '40_and_over'
END AS age_bucket
FROM student_ages sa
INNER JOIN data_mart.fact_continuation_scores fcs ON
sa.dim_person_id = fcs.dim_person_id
AND sa.institution_id = fcs.institution_id
INNER JOIN latest_continuation_score lcs ON
lcs.dim_person_id = fcs.dim_person_id
)
SELECT
ab.age_bucket,
COUNT(DISTINCT ab.institution_person_id) AS student_count,
AVG(ab.prediction) AS avg_prediction
FROM age_buckets ab
GROUP BY ab.age_bucket
ORDER BY
CASE ab.age_bucket
WHEN '18_and_under' THEN 1
WHEN '19' THEN 2
WHEN '20' THEN 3
WHEN '21' THEN 4
WHEN '22' THEN 5
WHEN '23' THEN 6
WHEN '24' THEN 7
WHEN '25 to 29' THEN 8
WHEN '30 to 34' THEN 9
WHEN '35 to 39' THEN 10
WHEN '40_and_over' THEN 11
END;

Dropped Students with Low Persistence Scores

This query aims to provide a list of students who have recently dropped at least one course in the active term and have a low continuation score. We are considering low prediction scores with values below 0.5. Users are able to adjust the threshold as desired.

WITH latest_continuation_score AS (
SELECT
fcs.dim_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.event_ts
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
WHERE fcs.event_ts = (
SELECT
MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores fcs1
WHERE
fcs.dim_person_id = fcs1.dim_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id
) -- This step ensure that the query returns the latest score for each student.
AND fcs.prediction < 0.5 -- Adjust persistence score threshold as desired here.
)
SELECT DISTINCT
dp.raw_person_id AS person_id
, ds.civ_term_id
, de.raw_drop_dt
, de.raw_drop_dt - de.raw_add_dt AS days_enrolled
, ds.civ_course_id
, ds.raw_title AS course_title
, lcs.prediction
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_person dp ON
dp.dim_person_id = fe.dim_person_id
INNER JOIN data_mart.dim_section ds ON
fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_enrollment de ON
fe.dim_enrollment_id = de.dim_enrollment_id
AND de.raw_person_id = dp.raw_person_id
INNER JOIN data_mart.dim_career_term dct ON
dct.civ_term_id = ds.civ_term_id
INNER JOIN latest_continuation_score lcs ON
lcs.dim_person_id = dp.dim_person_id
AND lcs.civ_term_id = ds.civ_term_id
WHERE
de.raw_drop_dt >= dct.civ_career_term_start_dt
AND de.raw_drop_dt <= dct.civ_career_term_end_dt
AND dct.is_active = 1
ORDER BY
de.raw_drop_dt DESC
, dp.institution_person_id;

Average Persistence Score by Term

This query calculates the average persistence score for each term. It also includes the average score by course. Uncomment the WHERE clause filters to limit the results to specific campuses, institutions, departments, subjects, or catalog numbers as needed.

WITH score_per_student_per_term AS (
SELECT
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
WHERE fcs.event_ts = (
SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores fcs1
INNER JOIN data_mart.dim_person dp1 ON
fcs1.dim_person_id = dp1.dim_person_id
WHERE dp.raw_person_id = dp1.raw_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id
)
)
, average_score_per_course AS (
SELECT
ds.civ_course_id
, de.civ_career_id
, AVG(spspt.prediction) AS total_average_score
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de ON
fe.dim_enrollment_id = de.dim_enrollment_id
INNER JOIN data_mart.dim_section ds ON
fe.dim_section_id = ds.dim_section_id
INNER JOIN score_per_student_per_term spspt ON
de.raw_person_id = spspt.raw_person_id
AND ds.civ_term_id = spspt.civ_term_id
AND de.civ_career_id = spspt.civ_career
GROUP BY
ds.civ_course_id
, de.civ_career_id
)
SELECT
ds.civ_course_id
, ds.raw_title AS course_title
, dct.civ_term_id
, ac.total_average_score AS all_time_average_score
, AVG(spspt.prediction) AS average_score_by_term
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de ON
fe.dim_enrollment_id = de.dim_enrollment_id
INNER JOIN data_mart.dim_section ds ON
fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_course dc ON
ds.raw_course_id = dc.raw_course_id
INNER JOIN data_mart.dim_career_term dct ON
ds.civ_term_id = dct.civ_term_id
AND de.civ_career_id = dct.civ_career_id
INNER JOIN score_per_student_per_term spspt ON
de.raw_person_id = spspt.raw_person_id
AND ds.civ_term_id = spspt.civ_term_id
AND de.civ_career_id = spspt.civ_career
INNER JOIN average_score_per_course ac ON
ds.civ_course_id = ac.civ_course_id
AND de.civ_career_id = ac.civ_career_id
WHERE 1 = 1
--AND dct.is_active = 1
--AND ds.raw_campus_id = '<CAMPUS_ID>'
--AND ds.raw_institution_id = '<INSTITUTION_ID>'
--AND ds.raw_department_id = '<DEPARTMENT_ID>'
--AND dc.raw_subject = '<SUBJECT>'
--AND dc.raw_catalog_nbr = '<CATALOG_NBR>'
GROUP BY
ds.civ_course_id
, ds.raw_title
, dct.civ_term_id
, ac.total_average_score
ORDER BY
ds.civ_course_id DESC
, dct.civ_term_id ASC;

High GPA Students with low persistence scores

-- This query identifies students who are currently enrolled (active) and have: High GPA (3.0 or higher) Low persistence/continuation score (below 50%)

WITH latest_continuation_score AS (
SELECT
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
WHERE fcs.event_ts = (
SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores fcs1
INNER JOIN data_mart.dim_person dp1 ON
fcs1.dim_person_id = dp1.dim_person_id
WHERE
dp.raw_person_id = dp1.raw_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id
)
AND fcs.prediction < 0.5
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
ORDER BY
fcs.event_ts DESC
, fcs.dim_person_id DESC
) = 1
)
, cte_gpa AS (
SELECT
dp.raw_person_id
, AVG(fpg.raw_gpa) AS avg_gpa
FROM data_mart.fact_provided_gpa fpg
INNER JOIN data_mart.dim_person dp ON
fpg.dim_person_id = dp.dim_person_id
GROUP BY dp.raw_person_id, fpg.event_ts
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
ORDER BY
fpg.event_ts DESC
) = 1
)
, active_students AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
, de.institution_id
, dct.civ_term_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
ds.raw_term_id = dct.raw_term_id
AND de.civ_career_id = dct.civ_career_id
WHERE dct.is_active = 1 -- Active terms only
AND de.raw_career_id <> 'Not Available'
AND de.civ_is_enrolled = 1 -- Only enrolled students
)
SELECT
dp.raw_person_id
, lcs.civ_career
, lcs.civ_term_id
, lcs.prediction AS continuation_score
, ROUND(cte_gpa.avg_gpa, 2) AS avg_gpa
FROM latest_continuation_score lcs
INNER JOIN data_mart.dim_person dp ON
lcs.raw_person_id = dp.raw_person_id
AND dp.raw_eff_ts_end > CURRENT_DATE -- Current/active records only
INNER JOIN cte_gpa ON
dp.raw_person_id = cte_gpa.raw_person_id
INNER JOIN active_students acs ON
dp.raw_person_id = acs.raw_person_id
AND lcs.civ_career = acs.civ_career_id
AND dp.institution_id = acs.institution_id
AND lcs.civ_term_id = acs.civ_term_id
WHERE cte_gpa.avg_gpa >= 3.0
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
ORDER BY dp.raw_eff_ts DESC
) = 1
ORDER BY
cte_gpa.avg_gpa DESC
, lcs.prediction ASC
, dp.raw_person_id ASC;

Student Continuation Prediction by Location

This query calculates the Student Continuation Prediction by Location. We will calculate the Continuation Prediction score by city, state, state_flag and zip_code

You can specify an State in which will generate a flag that will specify if the student is In State or Out of State

-- This state is the state that will indicate if the student is 'In State' or 'Out of State'.
WITH institution_state_variable AS (
SELECT 'MA' AS institution_state_value
)
, latest_continuation_score AS (
SELECT
dp.dim_person_id
, dp.institution_person_id
, fcs.civ_term_id
, dp.primary_state
, dp.primary_city
, dp.primary_zip
, fcs.event_ts
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp
ON fcs.dim_person_id = dp.dim_person_id
WHERE fcs.event_ts = (
SELECT MAX(fcs.event_ts)
FROM data_mart.fact_continuation_scores fcs
WHERE fcs.dim_person_id = fcs.dim_person_id
)
)
, continuation_score_by_location AS (
SELECT
lcs.institution_person_id
, lcs.primary_state
, lcs.primary_city
, lcs.primary_zip
, CASE
WHEN lcs.primary_state = (SELECT institution_state_value FROM institution_state_variable) THEN 'In State'
ELSE 'Out of State'
END AS state_flag
, AVG(lcs.prediction) AS student_persistence_prediction_score
, AVG(lcs.prediction) OVER (PARTITION BY lcs.primary_state) AS avg_persistence_prediction_by_state
, AVG(lcs.prediction) OVER (PARTITION BY state_flag) AS avg_persistence_prediction_by_in_state
, AVG(lcs.prediction) OVER (PARTITION BY lcs.primary_city) AS avg_persistence_prediction_by_city
, AVG(lcs.prediction) OVER (PARTITION BY lcs.primary_zip) AS avg_persistence_prediction_by_zip
, CASE
WHEN lcs.prediction >= 0.90 THEN 'Very High'
WHEN lcs.prediction >= 0.70 THEN 'High'
WHEN lcs.prediction >= 0.50 THEN 'Moderate'
WHEN lcs.prediction >= 0.20 THEN 'Low'
ELSE 'Very Low'
END AS student_completion_likelihood_category
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_person dp ON
dp.dim_person_id = fe.dim_person_id
INNER JOIN data_mart.dim_section ds ON
fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_career_term dct ON
dct.civ_term_id = ds.civ_term_id
INNER JOIN latest_continuation_score lcs ON
lcs.dim_person_id = dp.dim_person_id
AND lcs.civ_term_id = ds.civ_term_id
WHERE
CURRENT_DATE >= dp.raw_eff_ts
AND CURRENT_DATE < dp.raw_eff_ts_end
-- AND dct.is_active = 1 -- Students from active term only
GROUP BY
lcs.institution_person_id
, lcs.primary_state
, lcs.primary_city
, lcs.primary_zip
, lcs.prediction
)
SELECT
institution_person_id
, primary_state
, state_flag
, primary_city
, primary_zip
, student_persistence_prediction_score
, student_completion_likelihood_category
, avg_persistence_prediction_by_state
, avg_persistence_prediction_by_city
, avg_persistence_prediction_by_zip
FROM continuation_score_by_location
-- WHERE
-- institution_person_id = '<DESIRED_STUDENT_ID>'
-- AND primary_state = '<DESIRED_STATE>'
-- AND state_flag = 'In State'
-- AND primary_zip = '<DESIRED_ZIP>'
-- AND primary_city = '<DESIRED_CITY>'
-- AND student_persistence_prediction_score >= '<PERSISTENCE_PREDICTION>'

Besides that you can change the final select statement and get more details about the continuation prediction scores of students from a specific city, state, zip_code or in_state group.

-- Persistence prediction by state
SELECT
primary_state
, COUNT(*)
, MAX(avg_persistence_prediction_by_state) as avg_persistence_prediction_by_state
FROM continuation_score_by_location
WHERE primary_state = '<DESIRED_STATE>'
GROUP BY primary_state;
-- Persistence prediction by In State or Out of State flag
SELECT
state_flag
, COUNT(*)
, MAX(avg_persistence_prediction_by_in_state) as avg_persistence_prediction_by_in_state_flag
FROM continuation_score_by_location
GROUP BY state_flag;
-- Persistence prediction by zip code
SELECT
primary_zip
, COUNT(*)
, MAX(avg_persistence_prediction_by_zip) as avg_persistence_prediction_by_zip
FROM continuation_score_by_location
WHERE primary_zip = '<DESIRED_ZIP>'
GROUP BY primary_zip;

Single Student Ranked Features

important

Individual feature ranked (IFR) functionality is not included with standard warehouse deployments. If you would like to incorporate IFR, please inquire with Sales.

This query returns the Civitas model features, ranked by importance for the given student.

Specifying <STUDENT_ID> returns the individual feature ranked (IFR) features for the student, ranked by importance. The most important feature has a rank of 1.

Note: This query returns only features for which an IFR value was calculated.

SELECT
fcfl.event_ts AS score_date
, dp.institution_person_id
, dcf.civ_continuation_feature_id
, dcf.civ_continuation_feature_name
, dcf.civ_continuation_feature_description
, fcfl.text_value
, fcfl.int_value
, fcfl.float_value
, fcfl.bool_value
, fci.feature_rank -- Most important will be ranked 1
FROM data_mart.fact_continuation_feature_latest fcfl
INNER JOIN data_mart.dim_continuation_feature dcf USING (dim_continuation_feature_id, institution_id)
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.fact_continuation_ifr fci USING (event_ts, dim_continuation_feature_id, dim_person_id, dim_date_id, institution_id)
WHERE
dp.institution_person_id = '<STUDENT_ID>'
ORDER BY dcf.civ_continuation_feature_id;

Top and Bottom Percentile Section LMS Features Values

Identifies students' posting frequency, login activity, and academic performance relative to their section peers using z-score percentiles to flag at-risk behaviors. Provides engagement metrics including bottom/top 20% posting frequency and bottom quartile grades.

WITH feature_values_pivot AS (
SELECT *
FROM
(SELECT
fcfl.dim_person_id
, fcfl.civ_career_id
, dcf.civ_continuation_feature_id
, fcfl.float_value
FROM data_mart.fact_continuation_feature_latest fcfl
INNER JOIN data_mart.dim_continuation_feature dcf ON
fcfl.dim_continuation_feature_id = dcf.dim_continuation_feature_id
)
PIVOT (MAX(float_value) FOR civ_continuation_feature_id
-- Add any other feature you would like to include to the list below
-- bring it into the final query via the fvp CTE.
IN (
'avg_cnt_post_sum'
, 'avg_relative_avg_cnt_attendance_sum'
, 'zscore_cnt_post_sum_section_avg'
, 'avg_term_grade_percentage_cum'
, 'avg_relative_avg_section_grade_percentage_cum'
)
)
)
SELECT
dp.institution_person_id
, ds.raw_section_id
, ds.raw_course_id
, ds.raw_title
, fvp.avg_cnt_post_sum -- Posting Frequency Status
, fvp.avg_relative_avg_cnt_attendance_sum -- Login Frequency Status
, ds.raw_term_id -- Term Code
-- This is based on z-score percentiles for a normal distribution:
-- https://www.pindling.org/Math/Learning/Statistics/z_scores_table.htm
, ds.raw_start_dt
, ds.raw_end_dt
-- Class grade is in bottom quartile
, CASE
-- 20th percentile
WHEN fvp.zscore_cnt_post_sum_section_avg::FLOAT <= -0.842
THEN 1
ELSE 0
-- Posting Frequency Relative to Section (Bottom 20%,)
END AS posting_is_bottom_20_percent
, CASE
-- 80th percentile
WHEN fvp.zscore_cnt_post_sum_section_avg::FLOAT >= 0.842
THEN 1
ELSE 0
-- Posting frequency relative to section (Top 20%)
END AS posting_is_top_20_percent
, CASE
-- 25th percentile
WHEN fvp.avg_relative_avg_section_grade_percentage_cum::FLOAT
<= -0.674
THEN 1
ELSE 0
END AS grade_is_bottom_quartile
FROM data_mart.dim_person dp
INNER JOIN data_mart.fact_enroll fe ON
dp.dim_person_id = fe.dim_person_id
INNER JOIN data_mart.dim_enrollment de ON
fe.dim_enrollment_id = de.dim_enrollment_id
INNER JOIN data_mart.dim_section ds ON
fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_course dc ON
ds.raw_course_id = dc.raw_course_id
INNER JOIN feature_values_pivot fvp ON
dp.dim_person_id = fvp.dim_person_id
AND de.civ_career_id = fvp.civ_career_id
INNER JOIN data_mart.dim_career_term dct ON
de.raw_career_id = dct.raw_career_id
AND ds.raw_term_id = dct.raw_term_id
WHERE CURRENT_DATE >= dp.raw_eff_ts
AND CURRENT_DATE < dp.raw_eff_ts_end
AND dct.is_active = 1 -- Remove this line to include all terms
ORDER BY
dp.institution_person_id ASC
, ds.raw_section_id ASC
, ds.raw_course_id ASC
, ds.raw_term_id DESC;

Student Completion Predictions

Specifying '<STUDENT_ID>' returns the latest 4-year completion prediction for the student. Enrollment flag and actual completion outcome can also be obtained by joining with other tables in the data warehouse.

-- Find the terms a student has an active enrollment in for each of their careers (academic levels).
WITH student_enrollment AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id AS civ_career
, ds.civ_term_id
, de.civ_is_enrolled
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_enrollment de USING (dim_enrollment_id)
INNER JOIN data_mart.dim_section ds USING (dim_section_id)
WHERE
de.dim_enrollment_id != 'null_row'
AND ds.dim_section_id != 'null_row'
AND de.civ_is_enrolled = 1
)
-- Find the earliest term the student has graduated in for each of their careers (academic levels).
, earliest_completion_term AS (
SELECT
dp.raw_person_id
, dct.civ_career_id AS civ_career
, MIN(dct.civ_career_term_id) AS completion_career_term_id
FROM data_mart.fact_graduation fg
INNER JOIN data_mart.dim_person dp USING (dim_person_id)
INNER JOIN data_mart.dim_career_term dct USING (dim_career_term_id)
WHERE
fg.civ_is_award_conferred = 1
GROUP BY dp.raw_person_id, dct.civ_career_id
)
SELECT
fcsl.event_ts AS score_date
, fcsl.civ_term_id AS score_term
, fcsl.civ_career
, dp.institution_person_id
, fcsl.prediction
, COALESCE(se.civ_is_enrolled, 0) AS civ_is_enrolled
, CASE
WHEN ect.completion_career_term_id <= dcw.civ_end_term_sequence_no
THEN 1
ELSE 0
END AS civ_is_completed
FROM data_mart.fact_completion_scores_latest fcsl
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
INNER JOIN data_mart.dim_completion_window dcw USING (dim_completion_window_id, institution_id, civ_career)
LEFT JOIN student_enrollment se USING (raw_person_id, civ_career, civ_term_id)
LEFT JOIN earliest_completion_term ect USING (raw_person_id, civ_career)
WHERE
fcsl.window_length = 4 -- Adjust this for the graduation window you would like to use (4 year, 5 year, etc.). To see your configured windows: SELECT DISTINCT window_length FROM data_mart.dim_completion_window;
AND dp.institution_person_id = '<STUDENT_ID>';

Active Student Subquery

Use the subquery below to filter your results, returning only active or currently enrolled students for the current term.

WHERE dp.raw_person_id IN (
SELECT DISTINCT
de.raw_person_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
dct.raw_term_id = ds.raw_term_id
WHERE dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND e.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1)

See example below for how you would use the subquery in the 'Student Continuation Predictions' example.

SELECT
fcs.event_ts AS score_date
, dp.institution_person_id
, fcs.civ_career
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
WHERE dp.raw_person_id IN (
SELECT DISTINCT
de.raw_person_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
dct.raw_term_id = ds.raw_term_id
WHERE dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND e.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1)
ORDER BY dp.institution_person_id, fcs.event_ts DESC;

Average Persistence or Continuation Score by GPA Band

This query calculates the average persistence prediction by GPA Band and categorizes the persistence likelihood into five levels: Very High, High, Moderate, Low, and Very Low.

It also provides the count of students in each GPA Band used for the aggregated predictions.

You can modify the GPA ranges in the gpa_data CASE statement as per your requirements.

-- Get the latest persistence prediction for each student
WITH latest_prediction_per_student AS (
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcs.prediction AS latest_prediction
, de.civ_career_id
FROM
data_mart.fact_continuation_scores AS fcs
INNER JOIN
data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
INNER JOIN data_mart.dim_enrollment AS de
ON dp.raw_person_id = de.raw_person_id
WHERE de.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
QUALIFY
-- Keep only the latest prediction
ROW_NUMBER() OVER (
PARTITION BY
dp.institution_person_id
ORDER BY
fcs.event_ts DESC
) = 1
)
, gpa_data AS (
-- Map students to GPA bands based on their raw GPA values
SELECT
dp.institution_person_id
, CASE
WHEN fpg.raw_gpa >= 0.00 AND fpg.raw_gpa < 0.50 THEN '0.0-0.5'
WHEN fpg.raw_gpa >= 0.50 AND fpg.raw_gpa < 1.00 THEN '0.5-1.0'
WHEN fpg.raw_gpa >= 1.00 AND fpg.raw_gpa < 1.50 THEN '1.0-1.5'
WHEN fpg.raw_gpa >= 1.50 AND fpg.raw_gpa < 2.00 THEN '1.5-2.0'
WHEN fpg.raw_gpa >= 2.00 AND fpg.raw_gpa < 2.50 THEN '2.0-2.5'
WHEN fpg.raw_gpa >= 2.50 AND fpg.raw_gpa < 3.00 THEN '2.5-3.0'
WHEN fpg.raw_gpa >= 3.00 AND fpg.raw_gpa < 3.50 THEN '3.0-3.5'
WHEN fpg.raw_gpa >= 3.50 AND fpg.raw_gpa < 4.00 THEN '3.5-4.0'
WHEN fpg.raw_gpa >= 4.00 THEN '>= 4.0'
END AS gpa_band
FROM
data_mart.dim_person AS dp
INNER JOIN
data_mart.fact_provided_gpa AS fpg
ON dp.dim_person_id = fpg.dim_person_id
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.institution_person_id
ORDER BY
fpg.event_ts DESC
) = 1
)
, plan_stats AS (
-- Calculate average persistence prediction and student count per GPA band
SELECT
gd.gpa_band
, lpps.civ_career_id
, AVG(lpps.latest_prediction) AS avg_prediction_per_band -- Average persistence prediction
, COUNT(DISTINCT gd.institution_person_id) AS student_count -- Count of distinct students
FROM
latest_prediction_per_student AS lpps
INNER JOIN
gpa_data AS gd
ON lpps.institution_person_id = gd.institution_person_id
WHERE
lpps.raw_person_id IN (
-- Filter for active enrollments
SELECT DISTINCT de.raw_person_id
FROM
data_mart.dim_enrollment AS de
INNER JOIN
data_mart.dim_section AS ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN
data_mart.dim_career_term AS dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1 -- Only active terms
AND de.raw_career_id <> 'Not Available' -- Exclude invalid career IDs
AND de.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1 -- Only currently enrolled students
)
GROUP BY
gd.gpa_band, lpps.civ_career_id
ORDER BY
gd.gpa_band, lpps.civ_career_id
)
SELECT
ps.civ_career_id
, ps.gpa_band
, student_count
, TO_CHAR((ps.avg_prediction_per_band) * 100, 'FM999.00')
|| '%' AS avg_prediction
, CASE -- Categorize persistence likelihood
WHEN ps.avg_prediction_per_band >= 0.90 THEN 'Very High'
WHEN ps.avg_prediction_per_band >= 0.70 THEN 'High'
WHEN ps.avg_prediction_per_band >= 0.50 THEN 'Moderate'
WHEN ps.avg_prediction_per_band >= 0.20 THEN 'Low'
WHEN ps.avg_prediction_per_band < 0.20 THEN 'Very Low'
END AS persistence_likelihood
FROM
plan_stats AS ps
ORDER BY
ps.gpa_band DESC, ps.civ_career_id;

Percentile Binned Students by Persistence Score

This query categorizes active students into 10 percentile groups (deciles) based on their latest persistence (continuation) score prediction.

Notes:

  • Percentiles are based only on currently active students.
  • The latest persistence score is used for each student.
-- Get the latest persistence prediction for each student
WITH latest_prediction_per_student AS (
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcs.prediction AS latest_prediction
FROM data_mart.fact_continuation_scores AS fcs
INNER JOIN data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
dp.institution_person_id
ORDER BY
fcs.event_ts DESC
) = 1
)
, active_students AS (
-- Filter for active enrollments
SELECT DISTINCT de.raw_person_id
FROM data_mart.dim_enrollment AS de
INNER JOIN data_mart.dim_section AS ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term AS dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND de.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1
)
, percentile_binned_students AS (
-- Assign each student into one of 10 bins based on persistence score
SELECT
lpps.institution_person_id
, lpps.latest_prediction
, NTILE(10) OVER (
ORDER BY
lpps.latest_prediction
) AS percentile_group
FROM latest_prediction_per_student AS lpps
INNER JOIN active_students AS ast
ON lpps.raw_person_id = ast.raw_person_id
)
-- Final output with bin label
SELECT
pbs.institution_person_id
, TO_CHAR(pbs.latest_prediction * 100, 'FM999.00') || '%' AS student_persistence_score
, CASE percentile_group
WHEN 1 THEN '0-10th Percentile' -- Lowest scoring group
WHEN 2 THEN '10-20th Percentile'
WHEN 3 THEN '20-30th Percentile'
WHEN 4 THEN '30-40th Percentile'
WHEN 5 THEN '40-50th Percentile'
WHEN 6 THEN '50-60th Percentile'
WHEN 7 THEN '60-70th Percentile'
WHEN 8 THEN '70-80th Percentile'
WHEN 9 THEN '80-90th Percentile'
WHEN 10 THEN '90-100th Percentile' -- Highest scoring group
END AS percentile_bin
FROM percentile_binned_students AS pbs
ORDER BY
percentile_group, latest_prediction;

Average Continuation Score by Origin

This query calculates the average continuation prediction score for Domestic, International, and Unknown students. It uses the latest prediction for each student and only includes students with active enrollments.

WITH latest_prediction_per_student AS (
-- Get the latest persistence prediction for each student
SELECT
dp.raw_person_id
, dp.institution_person_id
, dp.raw_country_of_origin
, fcs.prediction AS latest_prediction
, fcs.civ_career
FROM data_mart.fact_continuation_scores AS fcs
INNER JOIN data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
dp.institution_person_id
ORDER BY
fcs.event_ts DESC
) = 1
)
, active_students AS (
-- Filter for students with active enrollments
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
FROM data_mart.dim_enrollment AS de
INNER JOIN data_mart.dim_section AS ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term AS dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND de.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1
)
-- Classify students and calculate averages
SELECT
CASE
WHEN lpps.raw_country_of_origin IN (
'American', 'USA', 'United States', 'US', 'U.S.A.', 'U.S.', 'America'
) THEN 'Domestic'
WHEN lpps.raw_country_of_origin IN (
'Not Applicable'
, 'Not Available'
, 'Unknown'
, 'N/A'
, 'NA'
, 'None'
, 'Unspecified'
, ''
) OR lpps.raw_country_of_origin IS NULL THEN 'Unknown'
ELSE 'International'
END AS country_of_origin
, AVG(lpps.latest_prediction) AS avg_persistence_score
, COUNT(*) AS student_count
FROM latest_prediction_per_student AS lpps
INNER JOIN active_students AS acts
ON lpps.raw_person_id = acts.raw_person_id
AND lpps.civ_career = acts.civ_career_id
GROUP BY
country_of_origin
ORDER BY
country_of_origin;

Utility Queries