Persistence Prediction Queries

High GPA Students with Low Persistence Scores

-- This query identifies students who are currently enrolled (active) and have: High GPA (3.0 or higher) Low persistence score (below 50%)

WITH latest_continuation_score AS (
SELECT
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp ON
fcs.dim_person_id = dp.dim_person_id
WHERE fcs.event_ts = (
SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores fcs1
INNER JOIN data_mart.dim_person dp1 ON
fcs1.dim_person_id = dp1.dim_person_id
WHERE
dp.raw_person_id = dp1.raw_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id
)
AND fcs.prediction < 0.5
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
, fcs.civ_career
, fcs.civ_term_id
ORDER BY
fcs.event_ts DESC
, fcs.dim_person_id DESC
) = 1
)
, cte_gpa AS (
SELECT
dp.raw_person_id
, AVG(fpg.raw_gpa) AS avg_gpa
FROM data_mart.fact_provided_gpa fpg
INNER JOIN data_mart.dim_person dp ON
fpg.dim_person_id = dp.dim_person_id
GROUP BY dp.raw_person_id, fpg.event_ts
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
ORDER BY
fpg.event_ts DESC
) = 1
)
, active_students AS (
SELECT DISTINCT
de.raw_person_id
, de.civ_career_id
, de.institution_id
, dct.civ_term_id
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds ON
de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct ON
ds.raw_term_id = dct.raw_term_id
AND de.civ_career_id = dct.civ_career_id
WHERE dct.is_active = 1 -- Active terms only
AND de.raw_career_id <> 'Not Available'
AND de.civ_is_enrolled = 1 -- Only enrolled students
)
SELECT
dp.raw_person_id
, lcs.civ_career
, lcs.civ_term_id
, lcs.prediction AS continuation_score
, ROUND(cte_gpa.avg_gpa, 2) AS avg_gpa
FROM latest_continuation_score lcs
INNER JOIN data_mart.dim_person dp ON
lcs.raw_person_id = dp.raw_person_id
AND dp.raw_eff_ts_end > CURRENT_DATE -- Current/active records only
INNER JOIN cte_gpa ON
dp.raw_person_id = cte_gpa.raw_person_id
INNER JOIN active_students acs ON
dp.raw_person_id = acs.raw_person_id
AND lcs.civ_career = acs.civ_career_id
AND dp.institution_id = acs.institution_id
AND lcs.civ_term_id = acs.civ_term_id
WHERE cte_gpa.avg_gpa >= 3.0
QUALIFY
ROW_NUMBER() OVER (
PARTITION BY
dp.raw_person_id
ORDER BY dp.raw_eff_ts DESC
) = 1
ORDER BY
cte_gpa.avg_gpa DESC
, lcs.prediction ASC
, dp.raw_person_id ASC;

Percentile Binned Students by Persistence Score

This query categorizes active students into 10 percentile groups (deciles) based on their latest persistence score prediction.

Notes:

  • Percentiles are based only on currently active students.
  • The latest persistence score is used for each student.
-- Get the latest persistence prediction for each student
WITH latest_prediction_per_student AS (
SELECT
dp.raw_person_id
, dp.institution_person_id
, fcs.prediction AS latest_prediction
FROM data_mart.fact_continuation_scores AS fcs
INNER JOIN data_mart.dim_person AS dp
ON fcs.dim_person_id = dp.dim_person_id
QUALIFY ROW_NUMBER() OVER (
PARTITION BY
dp.institution_person_id
ORDER BY
fcs.event_ts DESC
) = 1
)
, active_students AS (
-- Filter for active enrollments
SELECT DISTINCT de.raw_person_id
FROM data_mart.dim_enrollment AS de
INNER JOIN data_mart.dim_section AS ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term AS dct
ON ds.raw_term_id = dct.raw_term_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND de.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1
)
, percentile_binned_students AS (
-- Assign each student into one of 10 bins based on persistence score
SELECT
lpps.institution_person_id
, lpps.latest_prediction
, NTILE(10) OVER (
ORDER BY
lpps.latest_prediction
) AS percentile_group
FROM latest_prediction_per_student AS lpps
INNER JOIN active_students AS ast
ON lpps.raw_person_id = ast.raw_person_id
)
-- Final output with bin label
SELECT
pbs.institution_person_id
, TO_CHAR(pbs.latest_prediction * 100, 'FM999.00') || '%' AS student_persistence_score
, CASE percentile_group
WHEN 1 THEN '0-10th Percentile' -- Lowest scoring group
WHEN 2 THEN '10-20th Percentile'
WHEN 3 THEN '20-30th Percentile'
WHEN 4 THEN '30-40th Percentile'
WHEN 5 THEN '40-50th Percentile'
WHEN 6 THEN '50-60th Percentile'
WHEN 7 THEN '60-70th Percentile'
WHEN 8 THEN '70-80th Percentile'
WHEN 9 THEN '80-90th Percentile'
WHEN 10 THEN '90-100th Percentile' -- Highest scoring group
END AS percentile_bin
FROM percentile_binned_students AS pbs
ORDER BY
percentile_group, latest_prediction;

Student Persistence Predictions

This query fetches all historical persistence prediction scores, the institution ID, and the date the score was generated for desired <STUDENT_ID>.

SELECT
fcs.event_ts AS score_date
, dp.institution_person_id
, fcs.civ_career
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp USING (dim_person_id, institution_id)
WHERE
dp.institution_person_id = '<STUDENT_ID>'
ORDER BY dp.institution_person_id, fcs.event_ts DESC;

Student Persistence Prediction by Age

This query calculates the average prediction score of students across different age buckets and counts the number of students in each bucket, based on the most recent persistence score date.

WITH latest_continuation_score AS (
SELECT
fcs1.dim_person_id
, fcs1.civ_career
, fcs1.event_ts
, fcs1.prediction
, fcs1.dim_date_id
, fcs1.civ_term_id
FROM data_mart.fact_continuation_scores fcs1
WHERE fcs1.event_ts = (SELECT MAX(fcs2.event_ts)
FROM data_mart.fact_continuation_scores fcs2)
)
, student_ages AS (
SELECT DISTINCT
dp.institution_person_id
, dp.institution_id
, dp.dim_person_id
, DATEDIFF(YEAR, dp.raw_birth_dt::DATE, CURRENT_DATE)
- (CASE -- Account for birthdays that have not occurred yet this year
WHEN TO_CHAR(CURRENT_DATE, 'MMDD') < TO_CHAR(dp.raw_birth_dt::DATE, 'MMDD')
THEN 1 ELSE 0
END) AS age
FROM data_mart.dim_person dp
WHERE age IS NOT NULL
)
, age_buckets AS (
SELECT
sa.institution_person_id,
sa.dim_person_id,
sa.institution_id,
fcs.prediction,
CASE
WHEN sa.age <= 18 THEN '18_and_under'
WHEN sa.age = 19 THEN '19'
WHEN sa.age = 20 THEN '20'
WHEN sa.age = 21 THEN '21'
WHEN sa.age = 22 THEN '22'
WHEN sa.age = 23 THEN '23'
WHEN sa.age = 24 THEN '24'
WHEN sa.age BETWEEN 25 AND 29 THEN '25 to 29'
WHEN sa.age BETWEEN 30 AND 34 THEN '30 to 34'
WHEN sa.age BETWEEN 35 AND 39 THEN '35 to 39'
ELSE '40_and_over'
END AS age_bucket
FROM student_ages sa
INNER JOIN data_mart.fact_continuation_scores fcs ON
sa.dim_person_id = fcs.dim_person_id
AND sa.institution_id = fcs.institution_id
INNER JOIN latest_continuation_score lcs ON
lcs.dim_person_id = fcs.dim_person_id
)
SELECT
ab.age_bucket,
COUNT(DISTINCT ab.institution_person_id) AS student_count,
AVG(ab.prediction) AS avg_prediction
FROM age_buckets ab
GROUP BY ab.age_bucket
ORDER BY
CASE ab.age_bucket
WHEN '18_and_under' THEN 1
WHEN '19' THEN 2
WHEN '20' THEN 3
WHEN '21' THEN 4
WHEN '22' THEN 5
WHEN '23' THEN 6
WHEN '24' THEN 7
WHEN '25 to 29' THEN 8
WHEN '30 to 34' THEN 9
WHEN '35 to 39' THEN 10
WHEN '40_and_over' THEN 11
END;

Student Persistence Prediction by Location

This query calculates the Student Persistence Prediction by Location. We will calculate the Persistence Prediction score by city, state, state_flag and zip_code

You can specify an State in which will generate a flag that will specify if the student is In State or Out of State

-- This state is the state that will indicate if the student is 'In State' or 'Out of State'.
WITH institution_state_variable AS (
SELECT 'MA' AS institution_state_value
)
, latest_continuation_score AS (
SELECT
dp.dim_person_id
, dp.institution_person_id
, fcs.civ_term_id
, dp.primary_state
, dp.primary_city
, dp.primary_zip
, fcs.event_ts
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
INNER JOIN data_mart.dim_person dp
ON fcs.dim_person_id = dp.dim_person_id
WHERE fcs.event_ts = (
SELECT MAX(fcs.event_ts)
FROM data_mart.fact_continuation_scores fcs
WHERE fcs.dim_person_id = fcs.dim_person_id
)
)
, continuation_score_by_location AS (
SELECT
lcs.institution_person_id
, lcs.primary_state
, lcs.primary_city
, lcs.primary_zip
, CASE
WHEN lcs.primary_state = (SELECT institution_state_value FROM institution_state_variable) THEN 'In State'
ELSE 'Out of State'
END AS state_flag
, AVG(lcs.prediction) AS student_persistence_prediction_score
, AVG(lcs.prediction) OVER (PARTITION BY lcs.primary_state) AS avg_persistence_prediction_by_state
, AVG(lcs.prediction) OVER (PARTITION BY state_flag) AS avg_persistence_prediction_by_in_state
, AVG(lcs.prediction) OVER (PARTITION BY lcs.primary_city) AS avg_persistence_prediction_by_city
, AVG(lcs.prediction) OVER (PARTITION BY lcs.primary_zip) AS avg_persistence_prediction_by_zip
, CASE
WHEN lcs.prediction >= 0.90 THEN 'Very High'
WHEN lcs.prediction >= 0.70 THEN 'High'
WHEN lcs.prediction >= 0.50 THEN 'Moderate'
WHEN lcs.prediction >= 0.20 THEN 'Low'
ELSE 'Very Low'
END AS student_completion_likelihood_category
FROM data_mart.fact_enroll fe
INNER JOIN data_mart.dim_person dp ON
dp.dim_person_id = fe.dim_person_id
INNER JOIN data_mart.dim_section ds ON
fe.dim_section_id = ds.dim_section_id
INNER JOIN data_mart.dim_career_term dct ON
dct.civ_term_id = ds.civ_term_id
INNER JOIN latest_continuation_score lcs ON
lcs.dim_person_id = dp.dim_person_id
AND lcs.civ_term_id = ds.civ_term_id
WHERE
CURRENT_DATE >= dp.raw_eff_ts
AND CURRENT_DATE < dp.raw_eff_ts_end
-- AND dct.is_active = 1 -- Students from active term only
GROUP BY
lcs.institution_person_id
, lcs.primary_state
, lcs.primary_city
, lcs.primary_zip
, lcs.prediction
)
SELECT
institution_person_id
, primary_state
, state_flag
, primary_city
, primary_zip
, student_persistence_prediction_score
, student_completion_likelihood_category
, avg_persistence_prediction_by_state
, avg_persistence_prediction_by_city
, avg_persistence_prediction_by_zip
FROM continuation_score_by_location
-- WHERE
-- institution_person_id = '<DESIRED_STUDENT_ID>'
-- AND primary_state = '<DESIRED_STATE>'
-- AND state_flag = 'In State'
-- AND primary_zip = '<DESIRED_ZIP>'
-- AND primary_city = '<DESIRED_CITY>'
-- AND student_persistence_prediction_score >= '<PERSISTENCE_PREDICTION>'

Besides that you can change the final select statement and get more details about the persistence prediction scores of students from a specific city, state, zip_code or in_state group.

-- Persistence prediction by state
SELECT
primary_state
, COUNT(*)
, MAX(avg_persistence_prediction_by_state) as avg_persistence_prediction_by_state
FROM continuation_score_by_location
WHERE primary_state = '<DESIRED_STATE>'
GROUP BY primary_state;
-- Persistence prediction by In State or Out of State flag
SELECT
state_flag
, COUNT(*)
, MAX(avg_persistence_prediction_by_in_state) as avg_persistence_prediction_by_in_state_flag
FROM continuation_score_by_location
GROUP BY state_flag;
-- Persistence prediction by zip code
SELECT
primary_zip
, COUNT(*)
, MAX(avg_persistence_prediction_by_zip) as avg_persistence_prediction_by_zip
FROM continuation_score_by_location
WHERE primary_zip = '<DESIRED_ZIP>'
GROUP BY primary_zip;

Student Persistence Score Shift

This query returns the latest persistence score for each student and career (academic level), and calculates the one day and one week shift in this score so that the customer can identify students whose score has changed recently. This query can be adapted to calculate score shifts other than one day or week.

The query also displays the Civitas defined bucket for the student's latest persistence score using the default ranges for these buckets: Very Low, Low, Moderate, High, Very High

-- Find the latest persistence score for each student and career.
WITH latest_continuation_score AS (
SELECT
fcs.dim_person_id
, fcs.civ_career
, fcs.civ_term_id
, fcs.event_ts
, fcs.prediction
FROM data_mart.fact_continuation_scores fcs
WHERE
-- This step ensures that the query returns the latest score for each student.
fcs.event_ts = (SELECT MAX(fcs1.event_ts)
FROM data_mart.fact_continuation_scores fcs1
WHERE fcs.dim_person_id = fcs1.dim_person_id
AND fcs.civ_career = fcs1.civ_career
AND fcs.civ_term_id = fcs1.civ_term_id)
-- This step ensures that the query returns scores for currently enrolled students only.
-- Only currently enrolled students will have a score on the latest day.
AND fcs.event_ts = (SELECT MAX(fcs2.event_ts)
FROM data_mart.fact_continuation_scores fcs2)
)
, continuation_score_shift AS (
-- Change the '7 days' or '1 day' thresholds below to use a different window for the score shift.
SELECT
lcs.dim_person_id
, lcs.civ_career
, lcs.civ_term_id
, lcs.event_ts
, lcs.prediction AS current_prediction
, fcs_shift_7.prediction AS previous_prediction_7_days
, fcs_shift_1.prediction AS previous_prediction_1_day
FROM latest_continuation_score lcs
LEFT JOIN data_mart.fact_continuation_scores fcs_shift_7 ON
lcs.dim_person_id = fcs_shift_7.dim_person_id
AND lcs.civ_career = fcs_shift_7.civ_career
AND lcs.civ_term_id = fcs_shift_7.civ_term_id
AND (lcs.event_ts - INTERVAL '7 days') = fcs_shift_7.event_ts
LEFT JOIN data_mart.fact_continuation_scores fcs_shift_1 ON
lcs.dim_person_id = fcs_shift_1.dim_person_id
AND lcs.civ_career = fcs_shift_1.civ_career
AND lcs.civ_term_id = fcs_shift_1.civ_term_id
AND (lcs.event_ts - INTERVAL '1 day') = fcs_shift_1.event_ts
)
SELECT
dp.institution_person_id
, css.civ_career AS civitas_career_id
, css.civ_term_id
, css.event_ts::DATE AS event_date
-- Note, these are the default ranges. They may have been adjusted for your institution.
, CASE
WHEN css.current_prediction >= 0.90 THEN 'Very High'
WHEN css.current_prediction >= 0.70 THEN 'High'
WHEN css.current_prediction >= 0.50 THEN 'Moderate'
WHEN css.current_prediction >= 0.20 THEN 'Low'
WHEN css.current_prediction < 0.20 THEN 'Very Low'
ELSE NULL
END AS persistence_likelihood -- Persistence Likelihood (Very High, High, Moderate, Low, Very Low)
, css.current_prediction AS persistence_prediction_score -- Persistence Prediction Score %
, css.current_prediction - css.previous_prediction_7_days AS weekly_prediction_score_shift -- Weekly Change in Persistence Score
, css.current_prediction - css.previous_prediction_1_day AS daily_prediction_score_shift --Daily Change in Persistence Score
FROM data_mart.dim_person dp
INNER JOIN continuation_score_shift css ON
dp.dim_person_id = css.dim_person_id
WHERE
CURRENT_DATE >= dp.raw_eff_ts
AND CURRENT_DATE < dp.raw_eff_ts_end

Student Persistence Risk Indicators

This query identifies students who exhibit multiple risk factors for academic failure or dropout, providing a comprehensive view of at-risk students with demographic breakdowns and specific intervention recommendations.

RISK FACTORS ANALYZED:

  1. Low Persistence Scores (< 50%) - Prediction model indicates low likelihood of continuation
  2. Poor Assignment Performance (< 60% average or >50% failing assignments)
  3. Reduced LMS Activity (< 75% of average student activity)
WITH active_term AS (
-- Get the current active term with data quality checks
SELECT
dct.civ_term_id
, dct.civ_career_term_start_dt
, dct.civ_career_term_end_dt
FROM data_mart.dim_career_term dct
WHERE dct.is_active = 1
LIMIT 1
)
, active_students AS (
-- Get currently enrolled students in active term
SELECT DISTINCT
dp.raw_person_id
, dp.institution_person_id
, dp.dim_person_id
, de.civ_career_id
, dct.civ_term_id
, dp.civ_first_to_attend_college
-- Individual race/ethnicity flags from schema
, dp.is_hispanic
, dp.is_american_indian
, dp.is_asian
, dp.is_white
, dp.is_black
, dp.is_pacific_islander
, dp.raw_gender
, dp.primary_state
FROM data_mart.dim_enrollment de
INNER JOIN data_mart.dim_section ds
ON de.raw_section_id = ds.raw_section_id
INNER JOIN data_mart.dim_career_term dct
ON ds.civ_term_id = dct.civ_term_id
AND de.raw_career_id = dct.raw_career_id
INNER JOIN data_mart.dim_person dp
ON de.raw_person_id = dp.raw_person_id
WHERE
dct.is_active = 1
AND de.raw_career_id <> 'Not Available'
--AND de.civ_career_id IN ('U', 'G') -- Limit to certain civ_career_id's as desired
AND de.civ_is_enrolled = 1
)
-- RISK INDICATOR 1: Low Persistence Scores
, latest_persistence_scores AS (
SELECT
acs.dim_person_id
, acs.institution_person_id
, fcs.prediction AS persistence_score
, ROW_NUMBER() OVER (
PARTITION BY acs.dim_person_id
ORDER BY fcs.event_ts DESC
) AS rn
FROM active_students acs
INNER JOIN data_mart.fact_continuation_scores fcs
ON acs.dim_person_id = fcs.dim_person_id
AND acs.civ_career_id = fcs.civ_career
AND acs.civ_term_id = fcs.civ_term_id
)
, low_persistence_students AS (
SELECT
lps.dim_person_id
, lps.institution_person_id
, lps.persistence_score
, 'Low Persistence' AS risk_factor
FROM latest_persistence_scores lps
WHERE lps.rn = 1
AND lps.persistence_score < 0.50
)
-- RISK INDICATOR 2: Poor Assignment Performance (using dim_person_id)
, assignment_performance AS (
SELECT
acs.dim_person_id
, acs.institution_person_id
, AVG(CASE
WHEN fagf.raw_student_points_possible > 0
THEN fagf.civ_assignment_grade / fagf.raw_student_points_possible
ELSE fagf.civ_assignment_grade / 100.0
END) AS avg_assignment_grade_pct
, COUNT(fagf.civ_assignment_grade) AS total_assignments
, COUNT(CASE
WHEN (CASE
WHEN fagf.raw_student_points_possible > 0
THEN fagf.civ_assignment_grade / fagf.raw_student_points_possible
ELSE fagf.civ_assignment_grade / 100.0
END) < 0.60
THEN 1
END) AS failing_assignments
FROM active_students acs
INNER JOIN data_mart.fact_assignment_grade_final fagf
ON acs.dim_person_id = fagf.dim_person_id
WHERE fagf.civ_assignment_grade IS NOT NULL
GROUP BY acs.dim_person_id, acs.institution_person_id
)
, poor_assignment_students AS (
SELECT
ap.dim_person_id
, ap.institution_person_id
, ap.avg_assignment_grade_pct
, ap.total_assignments
, ap.failing_assignments
, 'Poor Assignment Performance' AS risk_factor
FROM assignment_performance ap
WHERE
ap.total_assignments >= 2
AND (
ap.avg_assignment_grade_pct < 0.60
OR (ap.failing_assignments::FLOAT / ap.total_assignments) > 0.50
)
)
-- RISK INDICATOR 3: Reduced LMS Activity
, term_lms_activity AS (
-- Get all LMS activity for the active term
SELECT
fwa.dim_person_id
, COUNT(fwa.event_ts) AS activity_count
FROM data_mart.fact_web_activity fwa
INNER JOIN active_term at
ON fwa.event_ts BETWEEN at.civ_career_term_start_dt AND CURRENT_DATE
WHERE fwa.civ_activity_type_id IN ('assignment', 'gradebook', 'post')
GROUP BY fwa.dim_person_id
)
, lms_activity_stats AS (
-- Calculate activity statistics
SELECT
AVG(activity_count) AS avg_activity_per_student
, STDDEV(activity_count) AS stddev_activity
FROM term_lms_activity
)
, student_lms_activity AS (
-- Join students with their activity counts using dim_person_id
SELECT
acs.dim_person_id
, acs.institution_person_id
, COALESCE(tla.activity_count, 0) AS activity_count
, las.avg_activity_per_student
FROM active_students acs
CROSS JOIN lms_activity_stats las
LEFT JOIN term_lms_activity tla
ON acs.dim_person_id = tla.dim_person_id
)
, reduced_lms_activity_students AS (
SELECT
sla.dim_person_id
, sla.institution_person_id
, sla.activity_count
, sla.avg_activity_per_student
, 'Reduced LMS Activity' AS risk_factor
FROM student_lms_activity sla
WHERE sla.activity_count < (sla.avg_activity_per_student * 0.75)
)
-- COMBINE RISK INDICATORS
, student_risk_factors AS (
SELECT
acs.dim_person_id
, acs.institution_person_id
, acs.raw_person_id
, acs.civ_career_id
, acs.civ_term_id
, acs.civ_first_to_attend_college
-- Construct race/ethnicity from individual flags
, CASE
WHEN acs.is_hispanic = 1 THEN 'Hispanic/Latino'
WHEN acs.is_black = 1 THEN 'Black/African American'
WHEN acs.is_asian = 1 THEN 'Asian'
WHEN acs.is_white = 1 THEN 'White'
WHEN acs.is_american_indian = 1 THEN 'American Indian/Alaska Native'
WHEN acs.is_pacific_islander = 1 THEN 'Pacific Islander'
ELSE 'Unknown/Other'
END AS race_ethnicity
, acs.raw_gender
, acs.primary_state
, lps.persistence_score
, pas.avg_assignment_grade_pct
, pas.total_assignments
, pas.failing_assignments
, rlas.activity_count AS lms_activity_count
, rlas.avg_activity_per_student AS avg_lms_activity
-- Risk factor flags
, CASE WHEN lps.dim_person_id IS NOT NULL THEN 1 ELSE 0 END AS has_low_persistence
, CASE WHEN pas.dim_person_id IS NOT NULL THEN 1 ELSE 0 END AS has_poor_assignments
, CASE WHEN rlas.dim_person_id IS NOT NULL THEN 1 ELSE 0 END AS has_low_lms_activity
FROM active_students acs
LEFT JOIN low_persistence_students lps
ON acs.dim_person_id = lps.dim_person_id
LEFT JOIN poor_assignment_students pas
ON acs.dim_person_id = pas.dim_person_id
LEFT JOIN reduced_lms_activity_students rlas
ON acs.dim_person_id = rlas.dim_person_id
)
, at_risk_students AS (
SELECT
dim_person_id
, institution_person_id
, raw_person_id
, civ_career_id
, civ_term_id
, civ_first_to_attend_college
-- Construct race/ethnicity from individual flags
, race_ethnicity
, raw_gender
, primary_state
, persistence_score
, avg_assignment_grade_pct
, total_assignments
, failing_assignments
, lms_activity_count
, avg_lms_activity
, has_low_persistence
, has_poor_assignments
, has_low_lms_activity
-- Aggregate risk metrics
, (has_low_persistence + has_poor_assignments + has_low_lms_activity) AS total_risk_factors
, (has_low_persistence * 3 + has_poor_assignments * 2 + has_low_lms_activity * 1) AS priority_score
FROM student_risk_factors
WHERE (has_low_persistence + has_poor_assignments + has_low_lms_activity) >= 2
)
-- MAIN QUERY: Students with Multiple Warning Signs + Demographics
SELECT
ars.raw_person_id
, ars.institution_person_id
, ars.civ_career_id
-- Risk metrics
, ars.total_risk_factors
, ars.priority_score
-- Risk Factor Details
, CASE WHEN ars.has_low_persistence = 1
THEN TRIM(TO_CHAR(ars.persistence_score * 100, '990.00')) || '%'
ELSE 'N/A'
END AS persistence_score_percentage
, CASE WHEN ars.has_poor_assignments = 1
THEN TRIM(TO_CHAR(ars.avg_assignment_grade_pct * 100, '990.00')) || '%'
ELSE 'N/A'
END AS avg_assignment_grade_percentage
, CASE WHEN ars.has_low_lms_activity = 1
THEN ars.lms_activity_count::VARCHAR
ELSE 'N/A'
END AS lms_activity_count
-- Demographics
, CASE
WHEN ars.civ_first_to_attend_college = 1 THEN 'First Generation'
ELSE 'Non First Generation'
END AS first_gen_status
, COALESCE(ars.race_ethnicity, 'Unknown') AS race_ethnicity
, CASE
WHEN UPPER(ars.raw_gender) IN ('M', 'MALE') THEN 'Male'
WHEN UPPER(ars.raw_gender) IN ('F', 'FEMALE') THEN 'Female'
ELSE 'Other/Unknown'
END AS gender
, COALESCE(ars.primary_state, 'Unknown') AS home_state
-- Intervention Recommendations
, CASE
WHEN ars.total_risk_factors = 3 AND ars.has_low_persistence = 1
THEN 'URGENT: Academic coaching + counseling + LMS training'
WHEN ars.total_risk_factors = 3
THEN 'URGENT: Comprehensive intervention needed'
WHEN ars.has_low_persistence = 1 AND ars.has_poor_assignments = 1
THEN 'HIGH: Tutoring + study skills support'
WHEN ars.has_low_persistence = 1 AND ars.has_low_lms_activity = 1
THEN 'HIGH: Engagement intervention + technical support'
WHEN ars.has_poor_assignments = 1 AND ars.has_low_lms_activity = 1
THEN 'MODERATE: Learning support + engagement strategies'
ELSE 'NORMAL: Monitor closely + targeted support'
END AS intervention_recommendation
FROM at_risk_students ars
ORDER BY
ars.priority_score DESC
, ars.total_risk_factors DESC
, ars.dim_person_id ASC