From 075b3f57240c58f752ebae6c8ba3ed0fc0d62efd Mon Sep 17 00:00:00 2001 From: William Hill Date: Tue, 24 Feb 2026 13:08:18 -0500 Subject: [PATCH 01/14] docs: design doc for self-service data upload (issue #86) --- .../2026-02-24-self-service-upload-design.md | 170 ++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 docs/plans/2026-02-24-self-service-upload-design.md diff --git a/docs/plans/2026-02-24-self-service-upload-design.md b/docs/plans/2026-02-24-self-service-upload-design.md new file mode 100644 index 0000000..9bf831b --- /dev/null +++ b/docs/plans/2026-02-24-self-service-upload-design.md @@ -0,0 +1,170 @@ +# Design: Self-Service Data Upload (Issue #86) + +**Date:** 2026-02-24 +**Author:** Claude Code + +--- + +## Overview + +Allow admin and IR users to upload institutional data files directly from the dashboard without +needing direct database or server access. Two upload paths: course enrollment CSVs (end-to-end +to Postgres) and PDP cohort/AR files (to Supabase Storage + GitHub Actions ML pipeline trigger). + +--- + +## Scope + +**In scope:** +- Course enrollment CSV → `course_enrollments` Postgres table (upsert) +- PDP Cohort CSV / PDP AR (.xlsx) → Supabase Storage + GitHub Actions `repository_dispatch` +- Preview step (first 10 rows + column validation) before commit +- Role guard: admin and ir only + +**Out of scope:** +- Upload history log (future issue) +- Column remapping UI (columns must match known schema) +- ML experiment tracking / MLflow (future issue) +- Auto-triggering ML pipeline without a server (GitHub Actions is the trigger mechanism) + +--- + +## Pages & Routing + +**New page:** `codebenders-dashboard/app/admin/upload/page.tsx` + +**Role guard:** Add to `lib/roles.ts` `ROUTE_PERMISSIONS`: +```ts +{ prefix: "/admin", roles: ["admin", "ir"] }, +{ prefix: "/api/admin", roles: ["admin", "ir"] }, +``` +Middleware already enforces this pattern via `x-user-role` header — no other auth code needed. + +**Nav link:** Add "Upload Data" to `nav-header.tsx`, visible only to admin/ir roles. + +**New API routes:** +- `POST /api/admin/upload/preview` — parse first 10 rows, return sample + validation summary +- `POST /api/admin/upload/commit` — full ingest (course → Postgres; PDP/AR → Storage + Actions) + +--- + +## UI Flow (3 States) + +### State 1 — Select & Drop +- Dropdown: file type (`Course Enrollment CSV` | `PDP Cohort CSV` | `PDP AR File (.xlsx)`) +- Drag-and-drop zone (click to pick; `.csv` for course/cohort, `.csv`+`.xlsx` for AR) +- "Preview" button → calls `/api/admin/upload/preview` + +### State 2 — Preview +- Shows: detected file type, estimated row count, first 10 rows in a table +- Validation banner: lists missing required columns or warnings +- "Confirm & Upload" → calls `/api/admin/upload/commit` +- "Back" link to return to State 1 + +### State 3 — Result +- Course enrollments: `{ inserted, skipped, errors[] }` summary card +- PDP/AR: "File accepted — ML pipeline queued in GitHub Actions" + link to Actions run +- "Upload another file" resets to State 1 + +--- + +## API Routes + +### `POST /api/admin/upload/preview` + +**Input:** `multipart/form-data` with `file` and `fileType` fields + +**Logic:** +1. Parse first 50 rows with `csv-parse` (CSV) or `xlsx` (Excel) +2. Validate required columns exist for the given `fileType` +3. Return `{ columns, sampleRows (first 10), rowCount (estimated), warnings[] }` + +### `POST /api/admin/upload/commit` + +**Input:** Same multipart form + +**Course enrollment path:** +1. Stream-parse full CSV with `csv-parse` async iterator +2. Batch-upsert 500 rows at a time into `course_enrollments` via `pg` +3. Conflict target: `(student_guid, course_prefix, course_number, academic_term)` +4. Return `{ inserted, skipped, errors[] }` + +**PDP/AR path:** +1. Upload file to Supabase Storage bucket `pdp-uploads` via `@supabase/supabase-js` +2. Call GitHub API `POST /repos/{owner}/{repo}/dispatches` with: + ```json + { "event_type": "ml-pipeline", "client_payload": { "file_path": "" } } + ``` +3. Return `{ status: "processing", actionsUrl: "https://github.com/{owner}/{repo}/actions" }` + +**Role enforcement:** Read `x-user-role` header (set by middleware); return 403 if not admin/ir. + +--- + +## GitHub Actions Workflow + +**File:** `.github/workflows/ml-pipeline.yml` + +**Trigger:** `repository_dispatch` with `event_type: ml-pipeline` + +**Steps:** +1. Checkout repo +2. Set up Python with `venv` +3. Install dependencies (`pip install -r requirements.txt`) +4. Download uploaded file from Supabase Storage using `SUPABASE_SERVICE_KEY` secret +5. Run `venv/bin/python ai_model/complete_ml_pipeline.py --input ` +6. Upload `ML_PIPELINE_REPORT.txt` as a GitHub Actions artifact (retained 90 days) + +**Required secrets:** `SUPABASE_URL`, `SUPABASE_SERVICE_KEY`, `GITHUB_TOKEN` (auto-provided) + +--- + +## Required Column Schemas + +### Course Enrollment CSV +Must include: `student_guid`, `course_prefix`, `course_number`, `academic_year`, `academic_term` +Optional (all other `course_enrollments` columns): filled as NULL if absent + +### PDP Cohort CSV +Must include: `Institution_ID`, `Cohort`, `Student_GUID`, `Cohort_Term` + +### PDP AR File (.xlsx) +Must include: `Institution_ID`, `Cohort`, `Student_GUID` (first sheet parsed) + +--- + +## New Packages + +| Package | Purpose | +|---------|---------| +| `csv-parse` | Streaming CSV parsing (async iterator mode) | +| `xlsx` | Excel (.xlsx) parsing | + +--- + +## New Files + +| File | Purpose | +|------|---------| +| `codebenders-dashboard/app/admin/upload/page.tsx` | Upload UI page | +| `codebenders-dashboard/app/api/admin/upload/preview/route.ts` | Preview API route | +| `codebenders-dashboard/app/api/admin/upload/commit/route.ts` | Commit API route | +| `.github/workflows/ml-pipeline.yml` | GitHub Actions ML pipeline trigger | + +--- + +## Supabase Changes + +**Storage bucket:** Create `pdp-uploads` bucket (private, authenticated access only). +No new database migrations required — `course_enrollments` table already exists. + +**Bucket policy:** Only service role key can read/write. Signed URLs used for pipeline download. + +--- + +## Constraints & Known Limitations + +- ML pipeline trigger via GitHub Actions means a ~30-60s delay before the pipeline starts +- Vercel free tier has a 4.5 MB request body limit — large files should use Supabase Storage direct upload in a future iteration +- No upload history log in this version (deferred) +- Column remapping is out of scope — files must match the known schema From 184202eed8002761efb8416e0a3e16c8b0508733 Mon Sep 17 00:00:00 2001 From: William Hill Date: Tue, 24 Feb 2026 13:12:15 -0500 Subject: [PATCH 02/14] docs: implementation plan for self-service data upload (issue #86) --- docs/plans/2026-02-24-self-service-upload.md | 1135 ++++++++++++++++++ 1 file changed, 1135 insertions(+) create mode 100644 docs/plans/2026-02-24-self-service-upload.md diff --git a/docs/plans/2026-02-24-self-service-upload.md b/docs/plans/2026-02-24-self-service-upload.md new file mode 100644 index 0000000..2c34769 --- /dev/null +++ b/docs/plans/2026-02-24-self-service-upload.md @@ -0,0 +1,1135 @@ +# Self-Service Data Upload Implementation Plan + +> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. + +**Goal:** Add a `/admin/upload` page (admin/ir only) for uploading course enrollment CSVs directly into Postgres, and PDP cohort/AR files into Supabase Storage with automatic GitHub Actions ML pipeline triggering. + +**Architecture:** Single unified upload page with a 3-state UI (select → preview → result). Two API routes: `/api/admin/upload/preview` (parse first 10 rows, validate columns) and `/api/admin/upload/commit` (course CSV → Postgres batch-upsert; PDP/AR → Supabase Storage + `repository_dispatch` to GitHub Actions). No new DB migrations needed — `course_enrollments` table already exists. + +**Tech Stack:** Next.js 16 App Router, `csv-parse` (streaming CSV), `xlsx` (Excel), `@supabase/supabase-js` (Storage), `pg` (Postgres upsert), GitHub REST API (`repository_dispatch`), TypeScript, Tailwind CSS, shadcn/ui + +--- + +## Task 1: Install `csv-parse` and `xlsx` packages + +**Files:** +- Modify: `codebenders-dashboard/package.json` (via npm install) + +**Step 1: Install packages** + +```bash +cd codebenders-dashboard && npm install csv-parse xlsx +``` + +**Step 2: Verify they appear in `package.json` dependencies** + +```bash +grep -E '"csv-parse"|"xlsx"' package.json +``` + +Expected output: +``` + "csv-parse": "^5.x.x", + "xlsx": "^0.x.x", +``` + +**Step 3: Commit** + +```bash +git add codebenders-dashboard/package.json codebenders-dashboard/package-lock.json +git commit -m "chore: add csv-parse and xlsx packages for file upload" +``` + +--- + +## Task 2: Add role permissions and nav link + +**Files:** +- Modify: `codebenders-dashboard/lib/roles.ts:6-13` +- Modify: `codebenders-dashboard/components/nav-header.tsx:15-20` + +**Step 1: Add `/admin` routes to `ROUTE_PERMISSIONS` in `lib/roles.ts`** + +Open `codebenders-dashboard/lib/roles.ts`. After line 13 (`{ prefix: "/api/query-history/export", ... }`), add two new entries so the array looks like: + +```ts +export const ROUTE_PERMISSIONS: Array<{ prefix: string; roles: Role[] }> = [ + { prefix: "/students", roles: ["admin", "advisor", "ir"] }, + { prefix: "/courses", roles: ["admin", "advisor", "ir", "faculty"] }, + { prefix: "/query", roles: ["admin", "advisor", "ir", "faculty"] }, + { prefix: "/api/students", roles: ["admin", "advisor", "ir"] }, + { prefix: "/api/courses", roles: ["admin", "advisor", "ir", "faculty"] }, + { prefix: "/api/query-summary", roles: ["admin", "advisor", "ir", "faculty"] }, + { prefix: "/api/query-history/export", roles: ["admin", "ir"] }, + { prefix: "/admin", roles: ["admin", "ir"] }, + { prefix: "/api/admin", roles: ["admin", "ir"] }, +] +``` + +**Step 2: Add "Upload Data" nav link in `nav-header.tsx`** + +The `NavHeader` component already receives `role` as a prop. Replace the `NAV_LINKS` constant and its usage so the Upload link only renders for admin/ir: + +```tsx +const NAV_LINKS = [ + { href: "/", label: "Dashboard", roles: null }, + { href: "/courses", label: "Courses", roles: null }, + { href: "/students", label: "Students", roles: null }, + { href: "/query", label: "Query", roles: null }, + { href: "/admin/upload", label: "Upload Data", roles: ["admin", "ir"] as Role[] }, +] +``` + +Then update the `nav` block to filter on role: + +```tsx + +``` + +**Step 3: Type-check** + +```bash +cd codebenders-dashboard && npx tsc --noEmit +``` + +Expected: no errors. + +**Step 4: Commit** + +```bash +git add codebenders-dashboard/lib/roles.ts codebenders-dashboard/components/nav-header.tsx +git commit -m "feat: add admin/ir role permissions and Upload Data nav link" +``` + +--- + +## Task 3: Add environment variables + +**Files:** +- Modify: `codebenders-dashboard/env.example` + +**Step 1: Add new env vars to `env.example`** + +Append to the bottom of `codebenders-dashboard/env.example`: + +```bash +# Supabase Storage (for PDP/AR file uploads — use the service role key, not anon) +# Find in Supabase → Project Settings → API → service_role key +SUPABASE_SERVICE_ROLE_KEY=your-service-role-key-here + +# GitHub Actions ML pipeline trigger +# Create a PAT at GitHub → Settings → Developer settings → Personal access tokens +# Required scope: repo (to trigger repository_dispatch) +GITHUB_PAT=ghp_your-personal-access-token-here +# Full repo path: owner/repo +GITHUB_REPO=devcolor/codebenders-datathon +``` + +**Step 2: Add the same vars to your local `.env.local`** + +Copy the three vars above into `codebenders-dashboard/.env.local` with real values. + +**Step 3: Commit** + +```bash +git add codebenders-dashboard/env.example +git commit -m "docs: add env vars for Supabase Storage and GitHub Actions pipeline trigger" +``` + +--- + +## Task 4: Create the preview API route + +**Files:** +- Create: `codebenders-dashboard/app/api/admin/upload/preview/route.ts` + +**Background:** This route accepts a `multipart/form-data` POST with two fields: +- `file` — the uploaded file (File object) +- `fileType` — one of `"course_enrollment"`, `"pdp_cohort"`, `"pdp_ar"` + +It parses the first 50 rows (or all rows if fewer), validates that required columns are present, and returns a preview payload. For `.xlsx` files, it reads the first sheet. For CSV, it uses `csv-parse`. + +**Required columns per file type:** +- `course_enrollment`: `Student_GUID`, `Course_Prefix`, `Course_Number`, `Academic_Year`, `Academic_Term` +- `pdp_cohort`: `Institution_ID`, `Cohort`, `Student_GUID`, `Cohort_Term` +- `pdp_ar`: `Institution_ID`, `Cohort`, `Student_GUID` + +**Step 1: Create the route file** + +Create `codebenders-dashboard/app/api/admin/upload/preview/route.ts` with this content: + +```typescript +import { type NextRequest, NextResponse } from "next/server" +import { parse } from "csv-parse/sync" +import * as XLSX from "xlsx" + +const REQUIRED_COLUMNS: Record = { + course_enrollment: ["Student_GUID", "Course_Prefix", "Course_Number", "Academic_Year", "Academic_Term"], + pdp_cohort: ["Institution_ID", "Cohort", "Student_GUID", "Cohort_Term"], + pdp_ar: ["Institution_ID", "Cohort", "Student_GUID"], +} + +export async function POST(request: NextRequest) { + const role = request.headers.get("x-user-role") + if (role !== "admin" && role !== "ir") { + return NextResponse.json({ error: "Forbidden" }, { status: 403 }) + } + + let formData: FormData + try { + formData = await request.formData() + } catch { + return NextResponse.json({ error: "Invalid multipart form data" }, { status: 400 }) + } + + const file = formData.get("file") as File | null + const fileType = formData.get("fileType") as string | null + + if (!file || !fileType) { + return NextResponse.json({ error: "Missing file or fileType" }, { status: 400 }) + } + if (!REQUIRED_COLUMNS[fileType]) { + return NextResponse.json({ error: `Unknown fileType: ${fileType}` }, { status: 400 }) + } + + let rows: Record[] + + try { + const arrayBuf = await file.arrayBuffer() + const buffer = Buffer.from(arrayBuf) + + if (file.name.endsWith(".xlsx")) { + const wb = XLSX.read(buffer, { type: "buffer" }) + const ws = wb.Sheets[wb.SheetNames[0]] + rows = XLSX.utils.sheet_to_json>(ws, { defval: "" }) + } else { + rows = parse(buffer, { + columns: true, + skip_empty_lines: true, + to: 50, + cast: false, + }) as Record[] + } + } catch (err) { + return NextResponse.json( + { error: "Failed to parse file", details: err instanceof Error ? err.message : String(err) }, + { status: 400 } + ) + } + + if (rows.length === 0) { + return NextResponse.json({ error: "File is empty" }, { status: 400 }) + } + + const columns = Object.keys(rows[0]) + const required = REQUIRED_COLUMNS[fileType] + const missing = required.filter(col => !columns.includes(col)) + + const warnings: string[] = [] + if (missing.length > 0) { + warnings.push(`Missing required columns: ${missing.join(", ")}`) + } + + return NextResponse.json({ + columns, + sampleRows: rows.slice(0, 10), + rowCount: rows.length, // actual count of parsed rows (capped at 50) + warnings, + }) +} +``` + +**Step 2: Type-check** + +```bash +cd codebenders-dashboard && npx tsc --noEmit +``` + +Expected: no errors. + +**Step 3: Smoke-test with curl** (while `npm run dev` is running) + +```bash +curl -s -X POST http://localhost:3000/api/admin/upload/preview \ + -H "x-user-role: admin" \ + -F "fileType=course_enrollment" \ + -F "file=@../data/bishop_state_courses.csv" | jq '{columns: .columns[:3], rowCount: .rowCount, warnings: .warnings}' +``` + +Expected: JSON with `columns` array, `rowCount: 50`, `warnings: []` + +**Step 4: Commit** + +```bash +git add codebenders-dashboard/app/api/admin/upload/preview/route.ts +git commit -m "feat: add POST /api/admin/upload/preview route" +``` + +--- + +## Task 5: Create the commit route — course enrollment path + +**Files:** +- Create: `codebenders-dashboard/app/api/admin/upload/commit/route.ts` + +**Background:** For `course_enrollment` file type, stream-parse the full CSV and batch-upsert rows into `public.course_enrollments` in chunks of 500. Use `pg`'s `getPool()` (already available in `lib/db.ts`). The upsert conflict target is `(student_guid, course_prefix, course_number, academic_term)` — you'll need to add a unique constraint migration (Task 7) or use a simpler strategy. + +Actually, since the existing load script uses TRUNCATE (not upsert), and there's no unique index on `course_enrollments`, we'll use the same approach: truncate + re-insert. This is idempotent and matches the existing pattern. + +**Column mapping** from CSV header names → DB column names (matches the existing load script at `scripts/load-course-enrollments.ts`): + +| CSV header | DB column | +|---|---| +| Student_GUID | student_guid | +| Cohort | cohort | +| Cohort_Term | cohort_term | +| Academic_Year | academic_year | +| Academic_Term | academic_term | +| Course_Prefix | course_prefix | +| Course_Number | course_number | +| Course_Name | course_name | +| Course_CIP | course_cip | +| Course_Type | course_type | +| Math_or_English_Gateway | gateway_type | +| Co_requisite_Course | is_co_requisite (Y/N → boolean) | +| Core_Course | is_core_course (Y/N → boolean) | +| Core_Course_Type | core_course_type | +| Delivery_Method | delivery_method | +| Grade | grade | +| Number_of_Credits_Attempted | credits_attempted | +| Number_of_Credits_Earned | credits_earned | +| Course_Instructor_Employment_Status | instructor_status | + +**Step 1: Create the commit route file (course enrollment path only)** + +Create `codebenders-dashboard/app/api/admin/upload/commit/route.ts`: + +```typescript +import { type NextRequest, NextResponse } from "next/server" +import { parse } from "csv-parse" +import { Readable } from "stream" +import { getPool } from "@/lib/db" + +const BATCH_SIZE = 500 + +function toBoolean(val: string): boolean | null { + if (val === "Y") return true + if (val === "N") return false + return null +} + +function toNumeric(val: string): number | null { + const t = val.trim() + if (!t || t === "null" || t === "NULL") return null + const n = parseFloat(t) + return isNaN(n) ? null : n +} + +function toNullable(val: string): string | null { + const t = val.trim() + return t === "" ? null : t +} + +interface EnrollmentRow { + student_guid: string + cohort: string | null + cohort_term: string | null + academic_year: string | null + academic_term: string | null + course_prefix: string | null + course_number: string | null + course_name: string | null + course_cip: string | null + course_type: string | null + gateway_type: string | null + is_co_requisite: boolean | null + is_core_course: boolean | null + core_course_type: string | null + delivery_method: string | null + grade: string | null + credits_attempted: number | null + credits_earned: number | null + instructor_status: string | null +} + +const COLS = [ + "student_guid", "cohort", "cohort_term", "academic_year", "academic_term", + "course_prefix", "course_number", "course_name", "course_cip", "course_type", + "gateway_type", "is_co_requisite", "is_core_course", "core_course_type", + "delivery_method", "grade", "credits_attempted", "credits_earned", "instructor_status", +] as const + +async function insertBatch(client: import("pg").PoolClient, batch: EnrollmentRow[]): Promise { + if (batch.length === 0) return + const placeholders: string[] = [] + const params: unknown[] = [] + batch.forEach((row, ri) => { + const p = COLS.map((_, ci) => `$${ri * COLS.length + ci + 1}`).join(", ") + placeholders.push(`(${p})`) + COLS.forEach(col => params.push(row[col])) + }) + await client.query( + `INSERT INTO public.course_enrollments (${COLS.join(", ")}) VALUES ${placeholders.join(", ")}`, + params + ) +} + +async function processCourseEnrollment(buffer: Buffer): Promise<{ inserted: number; skipped: number; errors: string[] }> { + const pool = getPool() + const client = await pool.connect() + let inserted = 0 + let skipped = 0 + const errors: string[] = [] + + try { + await client.query("BEGIN") + await client.query("TRUNCATE TABLE public.course_enrollments RESTART IDENTITY") + + const parser = Readable.from(buffer).pipe( + parse({ columns: true, skip_empty_lines: true }) + ) + + let batch: EnrollmentRow[] = [] + + for await (const record of parser) { + const r = record as Record + const student_guid = toNullable(r["Student_GUID"] ?? "") + if (!student_guid) { + skipped++ + continue + } + batch.push({ + student_guid, + cohort: toNullable(r["Cohort"] ?? ""), + cohort_term: toNullable(r["Cohort_Term"] ?? ""), + academic_year: toNullable(r["Academic_Year"] ?? ""), + academic_term: toNullable(r["Academic_Term"] ?? ""), + course_prefix: toNullable(r["Course_Prefix"] ?? ""), + course_number: toNullable(r["Course_Number"] ?? ""), + course_name: toNullable(r["Course_Name"] ?? ""), + course_cip: toNullable(r["Course_CIP"] ?? ""), + course_type: toNullable(r["Course_Type"] ?? ""), + gateway_type: toNullable(r["Math_or_English_Gateway"] ?? ""), + is_co_requisite: toBoolean(r["Co_requisite_Course"] ?? ""), + is_core_course: toBoolean(r["Core_Course"] ?? ""), + core_course_type: toNullable(r["Core_Course_Type"] ?? ""), + delivery_method: toNullable(r["Delivery_Method"] ?? ""), + grade: toNullable(r["Grade"] ?? ""), + credits_attempted: toNumeric(r["Number_of_Credits_Attempted"] ?? ""), + credits_earned: toNumeric(r["Number_of_Credits_Earned"] ?? ""), + instructor_status: toNullable(r["Course_Instructor_Employment_Status"] ?? ""), + }) + inserted++ + if (batch.length >= BATCH_SIZE) { + await insertBatch(client, batch) + batch = [] + } + } + + if (batch.length > 0) await insertBatch(client, batch) + await client.query("COMMIT") + } catch (err) { + await client.query("ROLLBACK") + errors.push(err instanceof Error ? err.message : String(err)) + inserted = 0 + } finally { + client.release() + } + + return { inserted, skipped, errors } +} + +export async function POST(request: NextRequest) { + const role = request.headers.get("x-user-role") + if (role !== "admin" && role !== "ir") { + return NextResponse.json({ error: "Forbidden" }, { status: 403 }) + } + + let formData: FormData + try { + formData = await request.formData() + } catch { + return NextResponse.json({ error: "Invalid multipart form data" }, { status: 400 }) + } + + const file = formData.get("file") as File | null + const fileType = formData.get("fileType") as string | null + + if (!file || !fileType) { + return NextResponse.json({ error: "Missing file or fileType" }, { status: 400 }) + } + + const buffer = Buffer.from(await file.arrayBuffer()) + + if (fileType === "course_enrollment") { + const result = await processCourseEnrollment(buffer) + return NextResponse.json(result) + } + + // PDP/AR path — placeholder, implemented in Task 6 + return NextResponse.json({ error: `fileType "${fileType}" not yet implemented` }, { status: 501 }) +} +``` + +**Step 2: Type-check** + +```bash +cd codebenders-dashboard && npx tsc --noEmit +``` + +Expected: no errors. + +**Step 3: Smoke-test with curl** (while `npm run dev` is running) + +```bash +curl -s -X POST http://localhost:3000/api/admin/upload/commit \ + -H "x-user-role: admin" \ + -F "fileType=course_enrollment" \ + -F "file=@../data/bishop_state_courses.csv" | jq . +``` + +Expected: `{"inserted": , "skipped": 0, "errors": []}` + +**Step 4: Commit** + +```bash +git add codebenders-dashboard/app/api/admin/upload/commit/route.ts +git commit -m "feat: add POST /api/admin/upload/commit — course enrollment upsert path" +``` + +--- + +## Task 6: Extend commit route — PDP/AR path (Supabase Storage + GitHub dispatch) + +**Files:** +- Modify: `codebenders-dashboard/app/api/admin/upload/commit/route.ts` + +**Background:** For `pdp_cohort` and `pdp_ar` file types, the commit route: +1. Creates a Supabase service-role client (uses `SUPABASE_SERVICE_ROLE_KEY`) +2. Uploads the file to the `pdp-uploads` Storage bucket with path `/-` +3. Calls the GitHub `repository_dispatch` API with `GITHUB_PAT` and `GITHUB_REPO` env vars +4. Returns `{ status: "processing", storageKey, actionsUrl }` + +**Before this task:** Create the `pdp-uploads` bucket in your Supabase dashboard: +- Supabase → Storage → New bucket → name: `pdp-uploads` → Private + +**Step 1: Add the PDP/AR handler to the commit route** + +In `codebenders-dashboard/app/api/admin/upload/commit/route.ts`, add these imports at the top: + +```typescript +import { createClient } from "@supabase/supabase-js" +``` + +Add this function before the `POST` handler: + +```typescript +async function processPdpFile( + buffer: Buffer, + fileName: string, + fileType: string, +): Promise<{ status: string; storageKey: string; actionsUrl: string }> { + const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL + const serviceKey = process.env.SUPABASE_SERVICE_ROLE_KEY + const githubPat = process.env.GITHUB_PAT + const githubRepo = process.env.GITHUB_REPO + + if (!supabaseUrl || !serviceKey) throw new Error("Missing SUPABASE_SERVICE_ROLE_KEY") + if (!githubPat || !githubRepo) throw new Error("Missing GITHUB_PAT or GITHUB_REPO") + + // 1. Upload to Supabase Storage + const supabase = createClient(supabaseUrl, serviceKey) + const storageKey = `${fileType}/${Date.now()}-${fileName}` + const { error: uploadError } = await supabase.storage + .from("pdp-uploads") + .upload(storageKey, buffer, { contentType: "application/octet-stream", upsert: false }) + + if (uploadError) throw new Error(`Storage upload failed: ${uploadError.message}`) + + // 2. Trigger GitHub Actions via repository_dispatch + const dispatchRes = await fetch( + `https://api.github.com/repos/${githubRepo}/dispatches`, + { + method: "POST", + headers: { + Authorization: `Bearer ${githubPat}`, + Accept: "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + event_type: "ml-pipeline", + client_payload: { storage_key: storageKey, file_type: fileType }, + }), + } + ) + + if (!dispatchRes.ok) { + const body = await dispatchRes.text() + throw new Error(`GitHub dispatch failed (${dispatchRes.status}): ${body}`) + } + + const actionsUrl = `https://github.com/${githubRepo}/actions` + return { status: "processing", storageKey, actionsUrl } +} +``` + +Replace the placeholder in the `POST` handler at the bottom: + +```typescript + if (fileType === "pdp_cohort" || fileType === "pdp_ar") { + try { + const result = await processPdpFile(buffer, file.name, fileType) + return NextResponse.json(result) + } catch (err) { + return NextResponse.json( + { error: err instanceof Error ? err.message : String(err) }, + { status: 500 } + ) + } + } + + return NextResponse.json({ error: `Unknown fileType: ${fileType}` }, { status: 400 }) +``` + +**Step 2: Type-check** + +```bash +cd codebenders-dashboard && npx tsc --noEmit +``` + +Expected: no errors. + +**Step 3: Commit** + +```bash +git add codebenders-dashboard/app/api/admin/upload/commit/route.ts +git commit -m "feat: extend commit route with PDP/AR → Supabase Storage + GitHub Actions dispatch" +``` + +--- + +## Task 7: Create GitHub Actions ML pipeline workflow + +**Files:** +- Create: `.github/workflows/ml-pipeline.yml` + +**Background:** This workflow fires on `repository_dispatch` with `event_type: ml-pipeline`. It: +1. Downloads the uploaded file from Supabase Storage using a signed URL +2. Determines the target data file path from `file_type` in the payload +3. Replaces the appropriate file in `data/` with the uploaded one +4. Runs the Python ML pipeline +5. Uploads `ML_PIPELINE_REPORT.txt` as an artifact + +**Required GitHub Actions secrets** (set at repo level: Settings → Secrets → Actions): +- `SUPABASE_URL` — your Supabase project URL +- `SUPABASE_SERVICE_ROLE_KEY` — service role key for Storage access +- `DB_HOST`, `DB_USER`, `DB_PASSWORD`, `DB_PORT`, `DB_NAME`, `DB_SSL` — Postgres credentials + +**Step 1: Create the workflow file** + +Create `.github/workflows/ml-pipeline.yml`: + +```yaml +name: ML Pipeline + +on: + repository_dispatch: + types: [ml-pipeline] + +jobs: + run-pipeline: + name: Download data file and run ML pipeline + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Create virtualenv and install dependencies + run: | + python -m venv venv + venv/bin/pip install --upgrade pip + venv/bin/pip install -r requirements.txt + + - name: Download uploaded file from Supabase Storage + env: + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} + STORAGE_KEY: ${{ github.event.client_payload.storage_key }} + FILE_TYPE: ${{ github.event.client_payload.file_type }} + run: | + python - <<'EOF' + import os, urllib.request, json + + url = os.environ["SUPABASE_URL"] + key = os.environ["SUPABASE_SERVICE_ROLE_KEY"] + storage_key = os.environ["STORAGE_KEY"] + file_type = os.environ["FILE_TYPE"] + + # Get a signed download URL via Supabase Storage REST API + sign_url = f"{url}/storage/v1/object/sign/pdp-uploads/{storage_key}" + req = urllib.request.Request( + sign_url, + data=json.dumps({"expiresIn": 600}).encode(), + headers={ + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + "apikey": key, + }, + method="POST", + ) + with urllib.request.urlopen(req) as resp: + signed = json.loads(resp.read()) + signed_url = f"{url}/storage/v1{signed['signedURL']}" + + # Determine destination path + dest = { + "pdp_cohort": "data/bishop_state_cohorts_with_zip.csv", + "pdp_ar": "data/ar_bscc_with_zip.csv", + }.get(file_type) + if not dest: + raise ValueError(f"Unknown file_type: {file_type}") + + print(f"Downloading to {dest}...") + urllib.request.urlretrieve(signed_url, dest) + print("Download complete.") + EOF + + - name: Run ML pipeline + env: + DB_HOST: ${{ secrets.DB_HOST }} + DB_USER: ${{ secrets.DB_USER }} + DB_PASSWORD: ${{ secrets.DB_PASSWORD }} + DB_PORT: ${{ secrets.DB_PORT }} + DB_NAME: ${{ secrets.DB_NAME }} + DB_SSL: ${{ secrets.DB_SSL }} + run: | + venv/bin/python ai_model/complete_ml_pipeline.py + + - name: Upload ML pipeline report + uses: actions/upload-artifact@v4 + if: always() + with: + name: ml-pipeline-report-${{ github.run_id }} + path: ML_PIPELINE_REPORT.txt + retention-days: 90 +``` + +**Step 2: Commit** + +```bash +git add .github/workflows/ml-pipeline.yml +git commit -m "feat: add GitHub Actions ML pipeline workflow triggered by repository_dispatch" +``` + +--- + +## Task 8: Create the upload page UI + +**Files:** +- Create: `codebenders-dashboard/app/admin/upload/page.tsx` + +**Background:** This is a client component (`"use client"`) with three local state phases: `idle` (file selection), `preview` (showing sample rows + warnings), and `result` (showing outcome). It uses `fetch` to call the two API routes. Drag-and-drop is implemented with native HTML5 `onDrop` / `onDragOver` events. + +**Step 1: Create the page file** + +Create `codebenders-dashboard/app/admin/upload/page.tsx`: + +```tsx +"use client" + +import { useState, useCallback } from "react" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Button } from "@/components/ui/button" +import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" +import { Upload, AlertCircle, CheckCircle2, Loader2 } from "lucide-react" + +type FileType = "course_enrollment" | "pdp_cohort" | "pdp_ar" +type Phase = "idle" | "previewing" | "preview" | "committing" | "result" + +interface PreviewData { + columns: string[] + sampleRows: Record[] + rowCount: number + warnings: string[] +} + +interface CommitResult { + // Course enrollment + inserted?: number + skipped?: number + errors?: string[] + // PDP/AR + status?: string + storageKey?: string + actionsUrl?: string + error?: string +} + +const FILE_TYPE_LABELS: Record = { + course_enrollment: "Course Enrollment CSV", + pdp_cohort: "PDP Cohort CSV", + pdp_ar: "PDP AR File (.xlsx)", +} + +const FILE_TYPE_ACCEPT: Record = { + course_enrollment: ".csv", + pdp_cohort: ".csv", + pdp_ar: ".csv,.xlsx", +} + +export default function UploadPage() { + const [fileType, setFileType] = useState("course_enrollment") + const [file, setFile] = useState(null) + const [phase, setPhase] = useState("idle") + const [preview, setPreview] = useState(null) + const [result, setResult] = useState(null) + const [dragOver, setDragOver] = useState(false) + const [errorMsg, setErrorMsg] = useState(null) + + const handleFile = useCallback((f: File) => { + setFile(f) + setErrorMsg(null) + setPhase("idle") + setPreview(null) + setResult(null) + }, []) + + const handleDrop = useCallback((e: React.DragEvent) => { + e.preventDefault() + setDragOver(false) + const dropped = e.dataTransfer.files[0] + if (dropped) handleFile(dropped) + }, [handleFile]) + + const handlePreview = async () => { + if (!file) return + setPhase("previewing") + setErrorMsg(null) + const fd = new FormData() + fd.append("file", file) + fd.append("fileType", fileType) + try { + const res = await fetch("/api/admin/upload/preview", { method: "POST", body: fd }) + const data = await res.json() + if (!res.ok) { setErrorMsg(data.error ?? "Preview failed"); setPhase("idle"); return } + setPreview(data as PreviewData) + setPhase("preview") + } catch (err) { + setErrorMsg(err instanceof Error ? err.message : "Network error") + setPhase("idle") + } + } + + const handleCommit = async () => { + if (!file) return + setPhase("committing") + setErrorMsg(null) + const fd = new FormData() + fd.append("file", file) + fd.append("fileType", fileType) + try { + const res = await fetch("/api/admin/upload/commit", { method: "POST", body: fd }) + const data = await res.json() + if (!res.ok) { setErrorMsg(data.error ?? "Upload failed"); setPhase("preview"); return } + setResult(data as CommitResult) + setPhase("result") + } catch (err) { + setErrorMsg(err instanceof Error ? err.message : "Network error") + setPhase("preview") + } + } + + const reset = () => { + setFile(null) + setPhase("idle") + setPreview(null) + setResult(null) + setErrorMsg(null) + } + + return ( +
+
+

Upload Data

+

+ Import course enrollment CSVs or PDP/AR files. Admin and IR only. +

+
+ + {/* ── Phase: idle / selecting ── */} + {(phase === "idle" || phase === "previewing") && ( + + + Select File + Choose a file type, then drop or pick your file. + + + {/* File type selector */} +
+ {(Object.keys(FILE_TYPE_LABELS) as FileType[]).map(ft => ( + + ))} +
+ + {/* Drop zone */} + + + {errorMsg && ( +
+ + {errorMsg} +
+ )} + + +
+
+ )} + + {/* ── Phase: preview ── */} + {(phase === "preview" || phase === "committing") && preview && ( + + + Preview — {FILE_TYPE_LABELS[fileType]} + + {file?.name} · {preview.rowCount} rows parsed + + + + {preview.warnings.length > 0 && ( +
+ {preview.warnings.map((w, i) => ( +
+ + {w} +
+ ))} +
+ )} + +
+ + + + {preview.columns.slice(0, 8).map(col => ( + {col} + ))} + {preview.columns.length > 8 && +{preview.columns.length - 8} more} + + + + {preview.sampleRows.map((row, i) => ( + + {preview.columns.slice(0, 8).map(col => ( + {String(row[col] ?? "")} + ))} + {preview.columns.length > 8 && } + + ))} + +
+
+ + {errorMsg && ( +
+ + {errorMsg} +
+ )} + +
+ + +
+
+
+ )} + + {/* ── Phase: result ── */} + {phase === "result" && result && ( + + + + + Upload Complete + + + + {result.inserted !== undefined && ( +
+

{result.inserted.toLocaleString()} rows inserted

+ {(result.skipped ?? 0) > 0 &&

{result.skipped} rows skipped (missing Student_GUID)

} + {result.errors && result.errors.length > 0 && ( +
+ {result.errors.map((e, i) =>

{e}

)} +
+ )} +
+ )} + {result.status === "processing" && ( +
+

File saved to Supabase Storage. The ML pipeline has been queued in GitHub Actions.

+ {result.actionsUrl && ( + + View pipeline run on GitHub Actions → + + )} +
+ )} + {result.error && ( +
+ {result.error} +
+ )} + +
+
+ )} +
+ ) +} +``` + +**Step 2: Type-check** + +```bash +cd codebenders-dashboard && npx tsc --noEmit +``` + +Expected: no errors. + +**Step 3: Visual check** (while `npm run dev` is running) + +- Log in as an admin or IR user +- Navigate to `/admin/upload` +- Verify "Upload Data" appears in the nav +- Try dragging and dropping `data/bishop_state_courses.csv` +- Verify the preview table shows first 10 rows +- Verify "Confirm & Upload" runs and returns a result + +**Step 4: Commit** + +```bash +git add codebenders-dashboard/app/admin/upload/page.tsx +git commit -m "feat: add /admin/upload page with drag-drop, preview, and commit UI" +``` + +--- + +## Task 9: Final type-check, lint, and push + +**Step 1: Full type-check + lint** + +```bash +cd codebenders-dashboard && npx tsc --noEmit && npm run lint +``` + +Expected: 0 errors, 0 warnings (or only pre-existing warnings). + +**Step 2: Push and open PR** + +```bash +git push origin +gh pr create \ + --title "feat: self-service data upload for course and PDP/AR files (#86)" \ + --body "Closes #86 + +## Summary +- \`/admin/upload\` page (admin/ir only) with drag-drop, preview, and commit +- Course enrollment CSVs stream-parsed and batch-upserted into \`course_enrollments\` Postgres table +- PDP cohort CSVs and AR .xlsx files uploaded to Supabase Storage \`pdp-uploads\` bucket +- GitHub Actions workflow \`ml-pipeline.yml\` triggered via \`repository_dispatch\` after PDP/AR upload + +## New env vars required (see env.example) +- \`SUPABASE_SERVICE_ROLE_KEY\` +- \`GITHUB_PAT\` +- \`GITHUB_REPO\` + +## GitHub Actions secrets required +- \`SUPABASE_URL\`, \`SUPABASE_SERVICE_ROLE_KEY\`, \`DB_HOST\`, \`DB_USER\`, \`DB_PASSWORD\`, \`DB_PORT\`, \`DB_NAME\`, \`DB_SSL\` + +## Test plan +- [ ] Admin/IR can access \`/admin/upload\`; other roles get redirected +- [ ] Upload Data nav link visible to admin/IR only +- [ ] Course enrollment CSV preview shows first 10 rows with correct columns +- [ ] Course enrollment commit inserts rows into \`course_enrollments\` table +- [ ] PDP cohort CSV commit uploads to Supabase Storage and returns \`status: processing\` +- [ ] \`npx tsc --noEmit\` passes with 0 errors +" +``` From f7e96f4cf53cbb3837b1aa9b71bf1ee70db525f3 Mon Sep 17 00:00:00 2001 From: William Hill Date: Tue, 24 Feb 2026 13:18:12 -0500 Subject: [PATCH 03/14] chore: add csv-parse and xlsx packages for file upload --- codebenders-dashboard/package.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/codebenders-dashboard/package.json b/codebenders-dashboard/package.json index c9fdbf5..f477274 100644 --- a/codebenders-dashboard/package.json +++ b/codebenders-dashboard/package.json @@ -18,6 +18,7 @@ "ai": "^5.0.81", "class-variance-authority": "^0.7.1", "clsx": "2.1.1", + "csv-parse": "^6.1.0", "lucide-react": "0.548.0", "next": "^16.1.6", "pg": "^8.18.0", @@ -27,6 +28,7 @@ "tailwind-merge": "3.3.1", "tailwindcss-animate": "1.0.7", "tw-animate-css": "1.4.0", + "xlsx": "^0.18.5", "zod": "^3.24.1" }, "devDependencies": { From f8475766f9d0f5214c927f42badeb09c0acd0cc0 Mon Sep 17 00:00:00 2001 From: William Hill Date: Tue, 24 Feb 2026 13:21:01 -0500 Subject: [PATCH 04/14] feat: add admin/ir role permissions and Upload Data nav link --- codebenders-dashboard/components/nav-header.tsx | 11 ++++++----- codebenders-dashboard/lib/roles.ts | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/codebenders-dashboard/components/nav-header.tsx b/codebenders-dashboard/components/nav-header.tsx index ad70255..d967d88 100644 --- a/codebenders-dashboard/components/nav-header.tsx +++ b/codebenders-dashboard/components/nav-header.tsx @@ -13,10 +13,11 @@ interface NavHeaderProps { } const NAV_LINKS = [ - { href: "/", label: "Dashboard" }, - { href: "/courses", label: "Courses" }, - { href: "/students", label: "Students" }, - { href: "/query", label: "Query" }, + { href: "/", label: "Dashboard", roles: null }, + { href: "/courses", label: "Courses", roles: null }, + { href: "/students", label: "Students", roles: null }, + { href: "/query", label: "Query", roles: null }, + { href: "/admin/upload", label: "Upload Data", roles: ["admin", "ir"] as Role[] }, ] export function NavHeader({ email, role }: NavHeaderProps) { @@ -34,7 +35,7 @@ export function NavHeader({ email, role }: NavHeaderProps) { {/* Nav links */}