diff --git a/.github/workflows/ml-pipeline.yml b/.github/workflows/ml-pipeline.yml new file mode 100644 index 0000000..1dd17a8 --- /dev/null +++ b/.github/workflows/ml-pipeline.yml @@ -0,0 +1,87 @@ +name: ML Pipeline + +on: + repository_dispatch: + types: [ml-pipeline] + +jobs: + run-pipeline: + name: Download data file and run ML pipeline + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.11 + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Create virtualenv and install dependencies + run: | + python -m venv venv + venv/bin/pip install --upgrade pip + venv/bin/pip install -r requirements.txt + + - name: Download uploaded file from Supabase Storage + env: + SUPABASE_URL: ${{ secrets.SUPABASE_URL }} + SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }} + STORAGE_KEY: ${{ github.event.client_payload.storage_key }} + FILE_TYPE: ${{ github.event.client_payload.file_type }} + run: | + python - <<'EOF' + import os, urllib.request, json + + url = os.environ["SUPABASE_URL"] + key = os.environ["SUPABASE_SERVICE_ROLE_KEY"] + storage_key = os.environ["STORAGE_KEY"] + file_type = os.environ["FILE_TYPE"] + + # Get a signed download URL via Supabase Storage REST API + sign_url = f"{url}/storage/v1/object/sign/pdp-uploads/{storage_key}" + req = urllib.request.Request( + sign_url, + data=json.dumps({"expiresIn": 600}).encode(), + headers={ + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + "apikey": key, + }, + method="POST", + ) + with urllib.request.urlopen(req) as resp: + signed = json.loads(resp.read()) + signed_url = f"{url}/storage/v1{signed['signedURL']}" + + # Determine destination path + dest = { + "pdp_cohort": "data/bishop_state_cohorts_with_zip.csv", + "pdp_ar": "data/ar_bscc_with_zip.csv", + }.get(file_type) + if not dest: + raise ValueError(f"Unknown file_type: {file_type}") + + print(f"Downloading to {dest}...") + urllib.request.urlretrieve(signed_url, dest) + print("Download complete.") + EOF + + - name: Run ML pipeline + env: + DB_HOST: ${{ secrets.DB_HOST }} + DB_USER: ${{ secrets.DB_USER }} + DB_PASSWORD: ${{ secrets.DB_PASSWORD }} + DB_PORT: ${{ secrets.DB_PORT }} + DB_NAME: ${{ secrets.DB_NAME }} + DB_SSL: ${{ secrets.DB_SSL }} + run: | + venv/bin/python ai_model/complete_ml_pipeline.py + + - name: Upload ML pipeline report + uses: actions/upload-artifact@v4 + if: always() + with: + name: ml-pipeline-report-${{ github.run_id }} + path: ML_PIPELINE_REPORT.txt + retention-days: 90 diff --git a/codebenders-dashboard/app/admin/upload/page.tsx b/codebenders-dashboard/app/admin/upload/page.tsx new file mode 100644 index 0000000..09a2116 --- /dev/null +++ b/codebenders-dashboard/app/admin/upload/page.tsx @@ -0,0 +1,309 @@ +"use client" + +import { useState, useCallback, useRef } from "react" +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" +import { Button } from "@/components/ui/button" +import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" +import { Upload, AlertCircle, CheckCircle2, Loader2 } from "lucide-react" + +type FileType = "course_enrollment" | "pdp_cohort" | "pdp_ar" +type Phase = "idle" | "previewing" | "preview" | "committing" | "result" + +interface PreviewData { + columns: string[] + sampleRows: Record[] + rowCount: number + warnings: string[] +} + +interface CommitResult { + // Course enrollment + inserted?: number + skipped?: number + errors?: string[] + // PDP/AR + status?: string + storageKey?: string + actionsUrl?: string + error?: string +} + +const FILE_TYPE_LABELS: Record = { + course_enrollment: "Course Enrollment CSV", + pdp_cohort: "PDP Cohort CSV", + pdp_ar: "PDP AR File (.xlsx)", +} + +const FILE_TYPE_ACCEPT: Record = { + course_enrollment: ".csv", + pdp_cohort: ".csv", + pdp_ar: ".csv,.xlsx", +} + +export default function UploadPage() { + const [fileType, setFileType] = useState("course_enrollment") + const [file, setFile] = useState(null) + const [phase, setPhase] = useState("idle") + const [preview, setPreview] = useState(null) + const [result, setResult] = useState(null) + const [dragOver, setDragOver] = useState(false) + const [errorMsg, setErrorMsg] = useState(null) + const fileInputRef = useRef(null) + + const handleFile = useCallback((f: File) => { + setFile(f) + setErrorMsg(null) + setPhase("idle") + setPreview(null) + setResult(null) + if (fileInputRef.current) fileInputRef.current.value = "" + }, []) + + const handleDrop = useCallback((e: React.DragEvent) => { + e.preventDefault() + setDragOver(false) + const dropped = e.dataTransfer.files[0] + if (dropped) handleFile(dropped) + }, [handleFile]) + + const handlePreview = async () => { + if (!file) return + setPhase("previewing") + setErrorMsg(null) + const fd = new FormData() + fd.append("file", file) + fd.append("fileType", fileType) + try { + const res = await fetch("/api/admin/upload/preview", { method: "POST", body: fd }) + const data = await res.json() + if (!res.ok) { setErrorMsg(data.error ?? "Preview failed"); setPhase("idle"); return } + setPreview(data as PreviewData) + setPhase("preview") + } catch (err) { + setErrorMsg(err instanceof Error ? err.message : "Network error") + setPhase("idle") + } + } + + const handleCommit = async () => { + if (!file) return + setPhase("committing") + setErrorMsg(null) + const fd = new FormData() + fd.append("file", file) + fd.append("fileType", fileType) + try { + const res = await fetch("/api/admin/upload/commit", { method: "POST", body: fd }) + const data = await res.json() + if (!res.ok) { setErrorMsg(data.error ?? "Upload failed"); setPhase("preview"); return } + setResult(data as CommitResult) + setPhase("result") + } catch (err) { + setErrorMsg(err instanceof Error ? err.message : "Network error") + setPhase("preview") + } + } + + const reset = () => { + setFile(null) + setPhase("idle") + setPreview(null) + setResult(null) + setErrorMsg(null) + } + + return ( +
+
+

Upload Data

+

+ Import course enrollment CSVs or PDP/AR files. Admin and IR only. +

+
+ + {/* ── Phase: idle / selecting ── */} + {(phase === "idle" || phase === "previewing") && ( + + + Select File + Choose a file type, then drop or pick your file. + + + {/* File type selector */} +
+ {(Object.keys(FILE_TYPE_LABELS) as FileType[]).map(ft => ( + + ))} +
+ + {/* Drop zone */} + + + {errorMsg && ( +
+ + {errorMsg} +
+ )} + + +
+
+ )} + + {/* ── Phase: preview ── */} + {(phase === "preview" || phase === "committing") && preview && ( + + + Preview — {FILE_TYPE_LABELS[fileType]} + + {file?.name} · {preview.rowCount} rows parsed + + + + {preview.warnings.length > 0 && ( +
+ {preview.warnings.map((w, i) => ( +
+ + {w} +
+ ))} +
+ )} + +
+ + + + {preview.columns.slice(0, 8).map(col => ( + {col} + ))} + {preview.columns.length > 8 && +{preview.columns.length - 8} more} + + + + {preview.sampleRows.map((row, i) => ( + + {preview.columns.slice(0, 8).map(col => ( + {String(row[col] ?? "")} + ))} + {preview.columns.length > 8 && } + + ))} + +
+
+ + {errorMsg && ( +
+ + {errorMsg} +
+ )} + +
+ + +
+
+
+ )} + + {/* ── Phase: result ── */} + {phase === "result" && result && ( + + + + + Upload Complete + + + + {result.inserted !== undefined && ( +
+

{result.inserted.toLocaleString()} rows inserted

+ {(result.skipped ?? 0) > 0 &&

{result.skipped} rows skipped (missing Student_GUID)

} + {result.errors && result.errors.length > 0 && ( +
+ {result.errors.map((e, i) =>

{e}

)} +
+ )} +
+ )} + {result.status === "processing" && ( +
+

File saved to Supabase Storage. The ML pipeline has been queued in GitHub Actions.

+ {result.actionsUrl && ( + + View pipeline run on GitHub Actions → + + )} +
+ )} + {result.error && ( +
+ {result.error} +
+ )} + +
+
+ )} +
+ ) +} diff --git a/codebenders-dashboard/app/api/admin/upload/commit/route.ts b/codebenders-dashboard/app/api/admin/upload/commit/route.ts new file mode 100644 index 0000000..f36449b --- /dev/null +++ b/codebenders-dashboard/app/api/admin/upload/commit/route.ts @@ -0,0 +1,232 @@ +import { type NextRequest, NextResponse } from "next/server" +import { parse } from "csv-parse" +import { Readable } from "stream" +import { getPool } from "@/lib/db" +import { createClient } from "@supabase/supabase-js" + +const BATCH_SIZE = 500 + +function toBoolean(val: string): boolean | null { + if (val === "Y") return true + if (val === "N") return false + return null +} + +function toNumeric(val: string): number | null { + const t = val.trim() + if (!t || t === "null" || t === "NULL") return null + const n = parseFloat(t) + return isNaN(n) ? null : n +} + +function toNullable(val: string): string | null { + const t = val.trim() + return t === "" ? null : t +} + +interface EnrollmentRow { + student_guid: string + cohort: string | null + cohort_term: string | null + academic_year: string | null + academic_term: string | null + course_prefix: string | null + course_number: string | null + course_name: string | null + course_cip: string | null + course_type: string | null + gateway_type: string | null + is_co_requisite: boolean | null + is_core_course: boolean | null + core_course_type: string | null + delivery_method: string | null + grade: string | null + credits_attempted: number | null + credits_earned: number | null + instructor_status: string | null +} + +const COLS = [ + "student_guid", "cohort", "cohort_term", "academic_year", "academic_term", + "course_prefix", "course_number", "course_name", "course_cip", "course_type", + "gateway_type", "is_co_requisite", "is_core_course", "core_course_type", + "delivery_method", "grade", "credits_attempted", "credits_earned", "instructor_status", +] as const + +async function insertBatch(client: import("pg").PoolClient, batch: EnrollmentRow[]): Promise { + if (batch.length === 0) return + const placeholders: string[] = [] + const params: unknown[] = [] + batch.forEach((row, ri) => { + const p = COLS.map((_, ci) => `$${ri * COLS.length + ci + 1}`).join(", ") + placeholders.push(`(${p})`) + COLS.forEach(col => params.push(row[col])) + }) + await client.query( + `INSERT INTO public.course_enrollments (${COLS.join(", ")}) VALUES ${placeholders.join(", ")}`, + params + ) +} + +async function processCourseEnrollment(buffer: Buffer): Promise<{ inserted: number; skipped: number; errors: string[] }> { + const pool = getPool() + const client = await pool.connect() + let inserted = 0 + let skipped = 0 + const errors: string[] = [] + + try { + await client.query("BEGIN") + await client.query("TRUNCATE TABLE public.course_enrollments RESTART IDENTITY") + + const parser = Readable.from(buffer).pipe( + parse({ columns: true, skip_empty_lines: true }) + ) + + let batch: EnrollmentRow[] = [] + + for await (const record of parser) { + const r = record as Record + const student_guid = toNullable(r["Student_GUID"] ?? "") + if (!student_guid) { + skipped++ + continue + } + batch.push({ + student_guid, + cohort: toNullable(r["Cohort"] ?? ""), + cohort_term: toNullable(r["Cohort_Term"] ?? ""), + academic_year: toNullable(r["Academic_Year"] ?? ""), + academic_term: toNullable(r["Academic_Term"] ?? ""), + course_prefix: toNullable(r["Course_Prefix"] ?? ""), + course_number: toNullable(r["Course_Number"] ?? ""), + course_name: toNullable(r["Course_Name"] ?? ""), + course_cip: toNullable(r["Course_CIP"] ?? ""), + course_type: toNullable(r["Course_Type"] ?? ""), + gateway_type: toNullable(r["Math_or_English_Gateway"] ?? ""), + is_co_requisite: toBoolean(r["Co_requisite_Course"] ?? ""), + is_core_course: toBoolean(r["Core_Course"] ?? ""), + core_course_type: toNullable(r["Core_Course_Type"] ?? ""), + delivery_method: toNullable(r["Delivery_Method"] ?? ""), + grade: toNullable(r["Grade"] ?? ""), + credits_attempted: toNumeric(r["Number_of_Credits_Attempted"] ?? ""), + credits_earned: toNumeric(r["Number_of_Credits_Earned"] ?? ""), + instructor_status: toNullable(r["Course_Instructor_Employment_Status"] ?? ""), + }) + inserted++ + if (batch.length >= BATCH_SIZE) { + await insertBatch(client, batch) + batch = [] + } + } + + if (batch.length > 0) await insertBatch(client, batch) + await client.query("COMMIT") + } catch (err) { + await client.query("ROLLBACK") + errors.push(err instanceof Error ? err.message : String(err)) + inserted = 0 + skipped = 0 + } finally { + client.release() + } + + return { inserted, skipped, errors } +} + +async function processPdpFile( + buffer: Buffer, + fileName: string, + fileType: string, +): Promise<{ status: string; storageKey: string; actionsUrl: string }> { + const supabaseUrl = process.env.NEXT_PUBLIC_SUPABASE_URL + const serviceKey = process.env.SUPABASE_SERVICE_ROLE_KEY + const githubPat = process.env.GITHUB_PAT + const githubRepo = process.env.GITHUB_REPO + + if (!supabaseUrl || !serviceKey) throw new Error("Missing NEXT_PUBLIC_SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY") + if (!githubPat || !githubRepo) throw new Error("Missing GITHUB_PAT or GITHUB_REPO") + + // 1. Upload to Supabase Storage + const supabase = createClient(supabaseUrl, serviceKey) + const storageKey = `${fileType}/${Date.now()}-${fileName}` + const { error: uploadError } = await supabase.storage + .from("pdp-uploads") + .upload(storageKey, buffer, { contentType: "application/octet-stream", upsert: false }) + + if (uploadError) throw new Error(`Storage upload failed: ${uploadError.message}`) + + // 2. Trigger GitHub Actions via repository_dispatch + const dispatchRes = await fetch( + `https://api.github.com/repos/${githubRepo}/dispatches`, + { + method: "POST", + headers: { + Authorization: `Bearer ${githubPat}`, + Accept: "application/vnd.github+json", + "X-GitHub-Api-Version": "2022-11-28", + "Content-Type": "application/json", + }, + body: JSON.stringify({ + event_type: "ml-pipeline", + client_payload: { storage_key: storageKey, file_type: fileType }, + }), + } + ) + + if (!dispatchRes.ok) { + const body = await dispatchRes.text() + throw new Error(`GitHub dispatch failed (${dispatchRes.status}): ${body}`) + } + + const actionsUrl = `https://github.com/${githubRepo}/actions` + return { status: "processing", storageKey, actionsUrl } +} + +export async function POST(request: NextRequest) { + const role = request.headers.get("x-user-role") + if (role !== "admin" && role !== "ir") { + return NextResponse.json({ error: "Forbidden" }, { status: 403 }) + } + + let formData: FormData + try { + formData = await request.formData() + } catch { + return NextResponse.json({ error: "Invalid multipart form data" }, { status: 400 }) + } + + const file = formData.get("file") as File | null + const rawFileType = formData.get("fileType") as string | null + const fileType = rawFileType?.toLowerCase() ?? null + + if (!file || !fileType) { + return NextResponse.json({ error: "Missing file or fileType" }, { status: 400 }) + } + + let buffer: Buffer + try { + buffer = Buffer.from(await file.arrayBuffer()) + } catch { + return NextResponse.json({ error: "Failed to read uploaded file" }, { status: 400 }) + } + + if (fileType === "course_enrollment") { + const result = await processCourseEnrollment(buffer) + return NextResponse.json(result) + } + + if (fileType === "pdp_cohort" || fileType === "pdp_ar") { + try { + const result = await processPdpFile(buffer, file.name, fileType) + return NextResponse.json(result) + } catch (err) { + return NextResponse.json( + { error: err instanceof Error ? err.message : String(err) }, + { status: 500 } + ) + } + } + + return NextResponse.json({ error: `Unknown fileType: ${fileType}` }, { status: 400 }) +} diff --git a/codebenders-dashboard/app/api/admin/upload/preview/route.ts b/codebenders-dashboard/app/api/admin/upload/preview/route.ts new file mode 100644 index 0000000..154cdb9 --- /dev/null +++ b/codebenders-dashboard/app/api/admin/upload/preview/route.ts @@ -0,0 +1,83 @@ +import { type NextRequest, NextResponse } from "next/server" +import { parse } from "csv-parse/sync" +import * as XLSX from "xlsx" + +const REQUIRED_COLUMNS: Record = { + course_enrollment: ["Student_GUID", "Course_Prefix", "Course_Number", "Academic_Year", "Academic_Term"], + pdp_cohort: ["Institution_ID", "Cohort", "Student_GUID", "Cohort_Term"], + pdp_ar: ["Institution_ID", "Cohort", "Student_GUID"], +} + +export async function POST(request: NextRequest) { + const role = request.headers.get("x-user-role") + if (role !== "admin" && role !== "ir") { + return NextResponse.json({ error: "Forbidden" }, { status: 403 }) + } + + let formData: FormData + try { + formData = await request.formData() + } catch { + return NextResponse.json({ error: "Invalid multipart form data" }, { status: 400 }) + } + + const file = formData.get("file") as File | null + const rawFileType = formData.get("fileType") as string | null + const fileType = rawFileType?.toLowerCase() ?? null + + if (!file || !fileType) { + return NextResponse.json({ error: "Missing file or fileType" }, { status: 400 }) + } + if (!REQUIRED_COLUMNS[fileType]) { + return NextResponse.json({ error: `Unknown fileType: ${fileType}` }, { status: 400 }) + } + + let rows: Record[] + + try { + const arrayBuf = await file.arrayBuffer() + const buffer = Buffer.from(arrayBuf) + + if (file.name.endsWith(".xlsx")) { + const wb = XLSX.read(buffer, { type: "buffer" }) + const ws = wb.Sheets[wb.SheetNames[0]] + // Cap to 50 rows to match CSV behaviour + const fullRange = XLSX.utils.decode_range(ws["!ref"] ?? "A1") + fullRange.e.r = Math.min(fullRange.e.r, 50) + ws["!ref"] = XLSX.utils.encode_range(fullRange) + rows = XLSX.utils.sheet_to_json>(ws, { defval: "" }) + } else { + rows = parse(buffer, { + columns: true, + skip_empty_lines: true, + to: 50, + cast: false, + }) as Record[] + } + } catch (err) { + return NextResponse.json( + { error: "Failed to parse file", details: err instanceof Error ? err.message : String(err) }, + { status: 400 } + ) + } + + if (rows.length === 0) { + return NextResponse.json({ error: "File is empty" }, { status: 400 }) + } + + const columns = Object.keys(rows[0]) + const required = REQUIRED_COLUMNS[fileType] + const missing = required.filter(col => !columns.includes(col)) + + const warnings: string[] = [] + if (missing.length > 0) { + warnings.push(`Missing required columns: ${missing.join(", ")}`) + } + + return NextResponse.json({ + columns, + sampleRows: rows.slice(0, 10), + rowCount: rows.length, + warnings, + }) +} diff --git a/codebenders-dashboard/components/nav-header.tsx b/codebenders-dashboard/components/nav-header.tsx index ad70255..d967d88 100644 --- a/codebenders-dashboard/components/nav-header.tsx +++ b/codebenders-dashboard/components/nav-header.tsx @@ -13,10 +13,11 @@ interface NavHeaderProps { } const NAV_LINKS = [ - { href: "/", label: "Dashboard" }, - { href: "/courses", label: "Courses" }, - { href: "/students", label: "Students" }, - { href: "/query", label: "Query" }, + { href: "/", label: "Dashboard", roles: null }, + { href: "/courses", label: "Courses", roles: null }, + { href: "/students", label: "Students", roles: null }, + { href: "/query", label: "Query", roles: null }, + { href: "/admin/upload", label: "Upload Data", roles: ["admin", "ir"] as Role[] }, ] export function NavHeader({ email, role }: NavHeaderProps) { @@ -34,7 +35,7 @@ export function NavHeader({ email, role }: NavHeaderProps) { {/* Nav links */}