MozillaSecurity · ksy36 · Feb 26, 2026 · Feb 4, 2026 · Feb 11, 2026 · Feb 12, 2026
diff --git a/CLUSTERING.md b/CLUSTERING.md
@@ -4,10 +4,9 @@
 
 The clustering mechanism groups similar reports within each domain using unsupervised machine learning (SBERT embeddings and agglomerative clustering) and creates a bucket for each cluster. 
 
-## One-time clustering of existing reports
+## Running Full Clustering
+Note that running full clustering **will delete existing clusters and cluster-based buckets** and recreate them from scratch. Generally we'll need to do it only once.
 
-This command clusters reports by similarity and creates buckets for existing reports and intended to be run once. 
-Note that rerunning this command **will delete existing clusters and cluster-based buckets** and recreate them from scratch. 
 
 
 ```bash
@@ -48,6 +47,55 @@ The command performs the following steps:
 
 6. Clusters are saved to the database along with corresponding buckets. Each bucket receives a signature containing the domain and cluster ID for future report assignment.
 
+## Incremental Triage of New Reports
+
+After the initial full clustering, new incoming reports need to be assigned to appropriate buckets. This is handled by the `triage_new_reports` command, which runs every hour.
+
+### How it works
+
+1. **Match to existing clusters**: For each unbucketed report, the system:
+   - Generates a semantic embedding for the report text
+   - For each cluster in report's domain:
+      - Compare the input to every member in that cluster
+      - Find the N most similar members
+      - Calculate the average of those similarity scores
+   - Assigns the report to the cluster with the highest average similarity, if that average exceeds the domain's threshold.
+2. **Cluster unmatched reports**: Reports that don't match any existing cluster are clustered among themselves:
+   - Groups similar unmatched reports into new clusters
+   - Creates new cluster-based buckets for these groups
+
+3. **Domain-based fallback**: Reports that still don't cluster are assigned to default domain-based buckets
+
+### Report Quality Criteria
+
+Similarly to full clustering, reports are only considered for clustering if they have:
+- Non-empty comment text
+- ML validity probability > 0.03 (not spam/invalid)
+
+Low-quality reports skip clustering and go directly to domain-based buckets.
+
+### Running Manually
+
+You can also run triage manually:
+```bash
+uv run -p 3.12 --extra=server server/manage.py triage_new_reports
+```
+
+Note: This command requires at least one successful full clustering run to have occurred first.
+
+## Results of clustering jobs in the UI
+
+It's possible to view clustering results and status through the web interface at `/reportmanager/clustering/`:
+
+- Job history with status, completion time, and number of buckets created
+- Real-time progress updates (polls every 10 seconds)
+- Error messages if a job fails
+
+**Job Types**:
+- **Full**: Re-clusters all reports from scratch (deletes existing clusters)
+- **Incremental**: Automatically triages new unbucketed reports against existing clusters (runs hourly via Celery Beat)
+
+
 ## Clustering algorithm details
 
 ### Semantic Embeddings

diff --git a/server/frontend/src/api.js b/server/frontend/src/api.js
@@ -12,6 +12,10 @@ export const retrieveBucket = async (id) =>
 export const listBuckets = async (params) =>
   (await mainAxios.get("/reportmanager/rest/buckets/", { params })).data;
 
+export const listClusteringJobs = async (params) =>
+  (await mainAxios.get("/reportmanager/rest/clustering-jobs/", { params }))
+    .data;
+
 export const reportStats = async (params) =>
   (await mainAxios.get("/reportmanager/rest/reports/stats/", { params })).data;
 

diff --git a/server/frontend/src/components/Clustering.vue b/server/frontend/src/components/Clustering.vue
@@ -0,0 +1,112 @@
+<template>
+  <div class="panel panel-default">
+    <div class="panel-heading">
+      <i class="bi bi-diagram-3"></i> Report Clustering
+    </div>
+    <div class="panel-body">
+      <h4>History</h4>
+      <div v-if="loading" class="text-center">
+        <i class="bi bi-hourglass-split"></i> Loading...
+      </div>
+
+      <div v-else-if="jobs.length === 0" class="alert alert-info" role="alert">
+        No clustering jobs have been run yet.
+      </div>
+
+      <table v-else class="table table-striped">
+        <thead>
+          <tr>
+            <th>Type</th>
+            <th>Started At</th>
+            <th>Completed At</th>
+            <th>Status</th>
+            <th>Domain</th>
+            <th>Buckets created</th>
+            <th>Error</th>
+          </tr>
+        </thead>
+        <tbody>
+          <tr v-for="job in jobs" :key="job.id">
+            <td>
+              <span v-if="job.job_type === 'full'" class="label label-primary"
+                >Full</span
+              >
+              <span v-else class="label label-default">Incremental</span>
+            </td>
+            <td class="wrap-none">{{ formatDate(job.started_at) }}</td>
+            <td class="wrap-none">
+              <span v-if="job.completed_at">{{
+                formatDate(job.completed_at)
+              }}</span>
+              <span v-else class="label label-info">In Progress</span>
+            </td>
+            <td>
+              <span v-if="!job.completed_at" class="label label-info"
+                >Running</span
+              >
+              <span v-else-if="job.is_ok" class="label label-success"
+                >Success</span
+              >
+              <span v-else class="label label-danger">Failed</span>
+            </td>
+            <td>{{ job.domain || "All domains" }}</td>
+            <td>{{ job.buckets_created }}</td>
+            <td>
+              <span v-if="job.error_message" class="text-danger">{{
+                job.error_message
+              }}</span>
+            </td>
+          </tr>
+        </tbody>
+      </table>
+    </div>
+  </div>
+</template>
+
+<script>
+import * as api from "../api.js";
+import { shorterDate } from "../helpers.js";
+
+export default {
+  name: "Clustering",
+  data() {
+    return {
+      jobs: [],
+      loading: true,
+      pollInterval: null,
+    };
+  },
+  mounted() {
+    this.fetchJobs();
+    this.pollInterval = setInterval(() => {
+      this.fetchJobs();
+    }, 10000);
+  },
+  beforeUnmount() {
+    if (this.pollInterval) {
+      clearInterval(this.pollInterval);
+    }
+  },
+  methods: {
+    async fetchJobs() {
+      try {
+        const data = await api.listClusteringJobs({ limit: 10 });
+        this.jobs = data.results;
+      } catch (err) {
+        console.error("Failed to fetch clustering jobs:", err);
+      } finally {
+        this.loading = false;
+      }
+    },
+    formatDate(dateString) {
+      return shorterDate(dateString);
+    },
+  },
+};
+</script>
+
+<style scoped>
+.mt-3 {
+  margin-top: 1rem;
+}
+</style>
diff --git a/server/frontend/src/main.js b/server/frontend/src/main.js
@@ -16,6 +16,7 @@ import ProviderKey from "./components/ProviderKey.vue";
 import BucketView from "./components/Buckets/View.vue";
 import BucketList from "./components/Buckets/List.vue";
 import SpikesList from "./components/Spikes/List.vue";
+import Clustering from "./components/Clustering.vue";
 
 import "floating-vue/dist/style.css";
 
@@ -35,6 +36,7 @@ const app = createApp({
     bucketlist: BucketList,
     bucketview: BucketView,
     spikeslist: SpikesList,
+    clustering: Clustering,
   },
 });