From d3313a44a4302b60bcfd7db4824b8870d9218648 Mon Sep 17 00:00:00 2001
From: Elizabeth Campolongo <38985481+egrace479@users.noreply.github.com>
Date: Mon, 8 Jun 2026 11:24:07 -0400
Subject: [PATCH 1/9] Fix URLs based on repo transfer

---
 README.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 10576f5..9ac798f 100644
--- a/README.md
+++ b/README.md
@@ -2,16 +2,16 @@
 
 ![PyPI Downloads](https://static.pepy.tech/badge/saev)
 ![MIT License](https://img.shields.io/badge/License-MIT-efefef)
-![GitHub Repo stars](https://img.shields.io/github/stars/OSU-NLP-group/saev?style=flat&label=GitHub%20%E2%AD%90)
+![GitHub Repo stars](https://img.shields.io/github/stars/Imageomics/saev?style=flat&label=GitHub%20%E2%AD%90)
 
 Training sparse autoencoders (SAEs) on vision transformers (ViTs), implemented in PyTorch.
 
 ## Docs
 
-- [Docs](https://osu-nlp-group.github.io/saev/api)
-- [Colab Notebook for SAE Inference](https://colab.research.google.com/github/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb)
-- [User guide](https://osu-nlp-group.github.io/saev/api/users/guide)
-- [API reference](https://osu-nlp-group.github.io/saev/api/api/saev/)
+- [Docs](https://imageomics.github.io/saev/api)
+- [Colab Notebook for SAE Inference](https://colab.research.google.com/github/Imageomics/saev/blob/main/examples/inference.ipynb)
+- [User guide](https://imageomics.github.io/saev/api/users/guide)
+- [API reference](https://imageomics.github.io/saev/api/api/saev/)
 
 ## Research
 
@@ -29,10 +29,9 @@ If you want to cite the software, please cite it as:
 ```bib
 @software{stevens2025saev,
   author = {Stevens, Samuel},
-  license = {CC-BY-4.0},
   month = apr,
   title = {{saev}},
-  url = {https://github.com/OSU-NLP-Group/saev},
+  url = {https://github.com/Imageomics/saev},
   year = {2025}
 }
 ```

From 8e76166c2fcf511418a2d599695de6c217597381 Mon Sep 17 00:00:00 2001
From: egrace479 <e.campolongo479@gmail.com>
Date: Tue, 9 Jun 2026 10:45:56 -0400
Subject: [PATCH 2/9] Reset links to Imageomics GitHub

---
 CITATION.cff                                  |    4 +-
 contrib/trait_discovery/CONTRIBUTING.md       |    4 +-
 .../trait_discovery/scripts/push_dinov3.py    |    8 +-
 docs/api/404.html                             |    2 +-
 docs/api/api/colors/index.html                |    4 +-
 docs/api/api/configs/index.html               |    4 +-
 docs/api/api/data/bird_mae/index.html         |    4 +-
 docs/api/api/data/buffers/index.html          |    4 +-
 docs/api/api/data/clip/index.html             |    4 +-
 docs/api/api/data/datasets/index.html         |    4 +-
 docs/api/api/data/dinov2/index.html           |    4 +-
 docs/api/api/data/dinov3/index.html           |    4 +-
 docs/api/api/data/fake_clip/index.html        |    4 +-
 docs/api/api/data/indexed/index.html          |    4 +-
 docs/api/api/data/models/index.html           |    4 +-
 docs/api/api/data/ordered/index.html          |    4 +-
 docs/api/api/data/pe/index.html               |    4 +-
 docs/api/api/data/saev.data/index.html        |    4 +-
 docs/api/api/data/shards/index.html           |    4 +-
 docs/api/api/data/shuffled/index.html         |    4 +-
 docs/api/api/data/siglip/index.html           |    4 +-
 docs/api/api/data/transforms/index.html       |    4 +-
 docs/api/api/disk/index.html                  |    4 +-
 docs/api/api/framework/inference/index.html   |    4 +-
 .../api/framework/saev.framework/index.html   |    4 +-
 docs/api/api/framework/shards/index.html      |    4 +-
 docs/api/api/framework/train/index.html       |    4 +-
 docs/api/api/helpers/index.html               |    4 +-
 docs/api/api/metrics/index.html               |    4 +-
 docs/api/api/nn/modeling/index.html           |    4 +-
 docs/api/api/nn/objectives/index.html         |    4 +-
 docs/api/api/nn/saev.nn/index.html            |    4 +-
 docs/api/api/saev/index.html                  |    4 +-
 docs/api/api/summary/index.html               |    4 +-
 docs/api/api/utils/monitoring/index.html      |    4 +-
 docs/api/api/utils/saev.utils/index.html      |    4 +-
 docs/api/api/utils/scheduling/index.html      |    4 +-
 docs/api/api/utils/statistics/index.html      |    4 +-
 docs/api/api/utils/wandb/index.html           |    4 +-
 docs/api/api/viz/index.html                   |    4 +-
 docs/api/developers/contributing/index.html   |    4 +-
 docs/api/developers/datapoint-init/index.html |    4 +-
 docs/api/developers/disk-layout/index.html    |    4 +-
 docs/api/developers/naming/index.html         |    4 +-
 docs/api/developers/protocol/index.html       |    4 +-
 docs/api/developers/workflows/index.html      |    4 +-
 docs/api/index.html                           |    6 +-
 docs/api/search/search_index.json             |    2 +-
 docs/api/sitemap.xml                          |   98 +-
 docs/api/users/bird-mae-debugging/index.html  |    4 +-
 docs/api/users/glossary/index.html            |    4 +-
 docs/api/users/guide/index.html               |    4 +-
 docs/api/users/inference/index.html           |   12 +-
 docs/api/users/new-project/index.html         |    4 +-
 docs/api/users/sweeps/index.html              |    4 +-
 .../SAE_BioCLIP_24K_ViT-B-16_iNat21.md        |   10 +-
 .../modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md  |   10 +-
 .../SAE_DINOv2_24K_ViT-B-14_IN1K.md           |    8 +-
 docs/demos/classification/dist/app.js         |    2 +-
 docs/demos/semseg/dist/app.js                 |    2 +-
 docs/index.html                               |    6 +-
 .../archive/reports/2025-10-03/report.typ     |    2 +-
 docs/internal/handoff/main.typ                |    2 +-
 docs/mkdocs.yml                               |    4 +-
 docs/src/index.md                             |    2 +-
 docs/src/users/inference.md                   |    8 +-
 examples/inference.ipynb                      | 4052 ++++++++---------
 pyproject.toml                                |    4 +-
 scripts/export_notebook.py                    |    2 +-
 scripts/push_models.py                        |    4 +-
 src/web/apps/classification/dist/app.js       |    2 +-
 src/web/apps/semseg/dist/app.js               |    2 +-
 src/web/src/Classification.elm                |    2 +-
 src/web/src/Semseg.elm                        |    2 +-
 74 files changed, 2225 insertions(+), 2225 deletions(-)

diff --git a/CITATION.cff b/CITATION.cff
index 0d10c04..d09bd8c 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -13,8 +13,8 @@ authors:
     email: samuel.robert.stevens@gmail.com
     orcid: 'https://orcid.org/0009-0000-9493-7766'
     affiliation: The Ohio State University
-repository-code: 'https://github.com/OSU-NLP-Group/saev'
-url: 'https://osu-nlp-group.github.io/saev/'
+repository-code: 'https://github.com/Imageomics/saev'
+url: 'https://imageomics.github.io/saev/'
 repository-artifact: 'https://pypi.org/project/saev/'
 abstract: >-
   saev is a package for training sparse autoencoders (SAEs)
diff --git a/contrib/trait_discovery/CONTRIBUTING.md b/contrib/trait_discovery/CONTRIBUTING.md
index 307a003..c726770 100644
--- a/contrib/trait_discovery/CONTRIBUTING.md
+++ b/contrib/trait_discovery/CONTRIBUTING.md
@@ -8,7 +8,7 @@ This project aims to use sparse autoencoders (SAEs) on vision transformers like
 
 Sparse autoencoders were recently applied to interpreting large language models by many groups.
 [Anthropic's work](https://transformer-circuits.pub/2024/scaling-monosemanticity/index.html) is probably the most well known, but [OpenAI has some work](https://cdn.openai.com/papers/sparse-autoencoders.pdf) and [Google does too](https://arxiv.org/abs/2408.05147).
-I have some prior work ([website](https://osu-nlp-group.github.io/saev/), [arxiv](https://arxiv.org/abs/2502.06755))  that shows that sparse autoencoders can also be applied to vision transformer activations and nice-looking qualitative examples are discovered in ViT activations.
+I have some prior work ([website](https://imageomics.github.io/saev/), [arxiv](https://arxiv.org/abs/2502.06755))  that shows that sparse autoencoders can also be applied to vision transformer activations and nice-looking qualitative examples are discovered in ViT activations.
 
 **"Interesting and scientifically meaningful"**
 
@@ -75,7 +75,7 @@ train_baseline.py
 ## Environment
 
 ```sh
-git clone https://github.com/OSU-NLP-Group/saev
+git clone https://github.com/Imageomics/saev
 git checkout ring-buffer
 
 # Check that saev/ installed okay.
diff --git a/contrib/trait_discovery/scripts/push_dinov3.py b/contrib/trait_discovery/scripts/push_dinov3.py
index 332be14..8b24865 100644
--- a/contrib/trait_discovery/scripts/push_dinov3.py
+++ b/contrib/trait_discovery/scripts/push_dinov3.py
@@ -251,10 +251,10 @@ def make_readme(repo: Repo, staged: list[StagedRun]) -> str:
 
 # SAE for Meta's {repo.title} trained on ImageNet-1K Activations
 
-* **Homepage:** https://osu-nlp-group.github.io/saev
-* **Code:** https://github.com/OSU-NLP-Group/saev
+* **Homepage:** https://imageomics.github.io/saev
+* **Code:** https://github.com/Imageomics/saev
 * **Preprint:** https://arxiv.org/abs/2511.17735
-* **Demos:** https://osu-nlp-group.github.io/saev#demos
+* **Demos:** https://imageomics.github.io/saev#demos
 * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu)
 
 ## Checkpoints
@@ -280,7 +280,7 @@ def make_readme(repo: Repo, staged: list[StagedRun]) -> str:
 
 ## Inference Instructions
 
-Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions).
+Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions).
 """
 
 
diff --git a/docs/api/404.html b/docs/api/404.html
index 830762c..9264179 100644
--- a/docs/api/404.html
+++ b/docs/api/404.html
@@ -1910,7 +1910,7 @@ <h1>404 - Not found</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/colors/index.html b/docs/api/api/colors/index.html
index 9e4280a..b260136 100644
--- a/docs/api/api/colors/index.html
+++ b/docs/api/api/colors/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/colors/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/colors/">
       
       
         <link rel="prev" href="../saev/">
@@ -2111,7 +2111,7 @@ <h1>saev.colors</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/configs/index.html b/docs/api/api/configs/index.html
index dd51f57..c2295fb 100644
--- a/docs/api/api/configs/index.html
+++ b/docs/api/api/configs/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/configs/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/configs/">
       
       
         <link rel="prev" href="../colors/">
@@ -2717,7 +2717,7 @@ <h2 id="saev.configs.load_sweep" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/bird_mae/index.html b/docs/api/api/data/bird_mae/index.html
index d148d84..8845f07 100644
--- a/docs/api/api/data/bird_mae/index.html
+++ b/docs/api/api/data/bird_mae/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/bird_mae/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/bird_mae/">
       
       
         <link rel="prev" href="../saev.data/">
@@ -3146,7 +3146,7 @@ <h2 id="saev.data.bird_mae.transform" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/buffers/index.html b/docs/api/api/data/buffers/index.html
index a66da2f..e9b6440 100644
--- a/docs/api/api/data/buffers/index.html
+++ b/docs/api/api/data/buffers/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/buffers/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/buffers/">
       
       
         <link rel="prev" href="../bird_mae/">
@@ -2707,7 +2707,7 @@ <h3 id="saev.data.buffers.RingBuffer.qsize" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/clip/index.html b/docs/api/api/data/clip/index.html
index 745d0a9..5c11a8c 100644
--- a/docs/api/api/data/clip/index.html
+++ b/docs/api/api/data/clip/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/clip/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/clip/">
       
       
         <link rel="prev" href="../buffers/">
@@ -2329,7 +2329,7 @@ <h3 id="saev.data.clip.Vit.make_transforms" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/datasets/index.html b/docs/api/api/data/datasets/index.html
index 621c32a..395c0f1 100644
--- a/docs/api/api/data/datasets/index.html
+++ b/docs/api/api/data/datasets/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/datasets/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/datasets/">
       
       
         <link rel="prev" href="../clip/">
@@ -4384,7 +4384,7 @@ <h2 id="saev.data.datasets.is_img_seg_dataset" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/dinov2/index.html b/docs/api/api/data/dinov2/index.html
index c7ebe9d..5b0e21b 100644
--- a/docs/api/api/data/dinov2/index.html
+++ b/docs/api/api/data/dinov2/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/dinov2/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/dinov2/">
       
       
         <link rel="prev" href="../datasets/">
@@ -2109,7 +2109,7 @@ <h1>saev.data.dinov2</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/dinov3/index.html b/docs/api/api/data/dinov3/index.html
index f450c24..fa507ea 100644
--- a/docs/api/api/data/dinov3/index.html
+++ b/docs/api/api/data/dinov3/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/dinov3/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/dinov3/">
       
       
         <link rel="prev" href="../dinov2/">
@@ -3264,7 +3264,7 @@ <h3 id="saev.data.dinov3.Vit.make_transforms" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/fake_clip/index.html b/docs/api/api/data/fake_clip/index.html
index 0d97be7..6bf9328 100644
--- a/docs/api/api/data/fake_clip/index.html
+++ b/docs/api/api/data/fake_clip/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/fake_clip/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/fake_clip/">
       
       
         <link rel="prev" href="../dinov3/">
@@ -2366,7 +2366,7 @@ <h3 id="saev.data.fake_clip.Vit.make_transforms" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/indexed/index.html b/docs/api/api/data/indexed/index.html
index e3a0d89..6454396 100644
--- a/docs/api/api/data/indexed/index.html
+++ b/docs/api/api/data/indexed/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/indexed/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/indexed/">
       
       
         <link rel="prev" href="../fake_clip/">
@@ -2529,7 +2529,7 @@ <h3 id="saev.data.indexed.Dataset.__len__" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/models/index.html b/docs/api/api/data/models/index.html
index 7f24bdf..7de7986 100644
--- a/docs/api/api/data/models/index.html
+++ b/docs/api/api/data/models/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/models/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/models/">
       
       
         <link rel="prev" href="../indexed/">
@@ -2572,7 +2572,7 @@ <h2 id="saev.data.models.register_family" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/ordered/index.html b/docs/api/api/data/ordered/index.html
index 2103e14..43858a3 100644
--- a/docs/api/api/data/ordered/index.html
+++ b/docs/api/api/data/ordered/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/ordered/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/ordered/">
       
       
         <link rel="prev" href="../models/">
@@ -2634,7 +2634,7 @@ <h3 id="saev.data.ordered.DataLoader.__len__" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/pe/index.html b/docs/api/api/data/pe/index.html
index 77bb749..7c11b01 100644
--- a/docs/api/api/data/pe/index.html
+++ b/docs/api/api/data/pe/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/pe/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/pe/">
       
       
         <link rel="prev" href="../ordered/">
@@ -2286,7 +2286,7 @@ <h2 id="saev.data.pe.Spatial" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/saev.data/index.html b/docs/api/api/data/saev.data/index.html
index 22fbcea..ebba51e 100644
--- a/docs/api/api/data/saev.data/index.html
+++ b/docs/api/api/data/saev.data/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/saev.data/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/saev.data/">
       
       
         <link rel="prev" href="../../configs/">
@@ -4819,7 +4819,7 @@ <h2 id="saev.data.make_ordered_config" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/shards/index.html b/docs/api/api/data/shards/index.html
index 3786b80..fd9b62b 100644
--- a/docs/api/api/data/shards/index.html
+++ b/docs/api/api/data/shards/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/shards/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/shards/">
       
       
         <link rel="prev" href="../pe/">
@@ -5839,7 +5839,7 @@ <h2 id="saev.data.shards.worker_fn" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/shuffled/index.html b/docs/api/api/data/shuffled/index.html
index 344c051..940bd2f 100644
--- a/docs/api/api/data/shuffled/index.html
+++ b/docs/api/api/data/shuffled/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/shuffled/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/shuffled/">
       
       
         <link rel="prev" href="../shards/">
@@ -3021,7 +3021,7 @@ <h3 id="saev.data.shuffled.DataLoader.__len__" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/siglip/index.html b/docs/api/api/data/siglip/index.html
index 6ea32c7..314e6c2 100644
--- a/docs/api/api/data/siglip/index.html
+++ b/docs/api/api/data/siglip/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/siglip/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/siglip/">
       
       
         <link rel="prev" href="../shuffled/">
@@ -2352,7 +2352,7 @@ <h3 id="saev.data.siglip.Vit.make_transforms" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/data/transforms/index.html b/docs/api/api/data/transforms/index.html
index f77c3a0..6f2e738 100644
--- a/docs/api/api/data/transforms/index.html
+++ b/docs/api/api/data/transforms/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/data/transforms/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/data/transforms/">
       
       
         <link rel="prev" href="../siglip/">
@@ -2359,7 +2359,7 @@ <h2 id="saev.data.transforms.unfolded_conv2d" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/disk/index.html b/docs/api/api/disk/index.html
index 62dd5b6..0dfe4f7 100644
--- a/docs/api/api/disk/index.html
+++ b/docs/api/api/disk/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/disk/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/disk/">
       
       
         <link rel="prev" href="../data/transforms/">
@@ -3006,7 +3006,7 @@ <h2 id="saev.disk.is_shards_root" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/framework/inference/index.html b/docs/api/api/framework/inference/index.html
index 5e4487b..40d0505 100644
--- a/docs/api/api/framework/inference/index.html
+++ b/docs/api/api/framework/inference/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/framework/inference/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/framework/inference/">
       
       
         <link rel="prev" href="../saev.framework/">
@@ -2830,7 +2830,7 @@ <h2 id="saev.framework.inference.main" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/framework/saev.framework/index.html b/docs/api/api/framework/saev.framework/index.html
index a5f8f6d..c5abfdd 100644
--- a/docs/api/api/framework/saev.framework/index.html
+++ b/docs/api/api/framework/saev.framework/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/framework/saev.framework/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/framework/saev.framework/">
       
       
         <link rel="prev" href="../../disk/">
@@ -2113,7 +2113,7 @@ <h1>saev.framework</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/framework/shards/index.html b/docs/api/api/framework/shards/index.html
index 0f268ee..bc7f833 100644
--- a/docs/api/api/framework/shards/index.html
+++ b/docs/api/api/framework/shards/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/framework/shards/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/framework/shards/">
       
       
         <link rel="prev" href="../inference/">
@@ -2942,7 +2942,7 @@ <h2 id="saev.framework.shards.cli" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/framework/train/index.html b/docs/api/api/framework/train/index.html
index 0ab3c7d..6dfae60 100644
--- a/docs/api/api/framework/train/index.html
+++ b/docs/api/api/framework/train/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/framework/train/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/framework/train/">
       
       
         <link rel="prev" href="../shards/">
@@ -4631,7 +4631,7 @@ <h2 id="saev.framework.train.train" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/helpers/index.html b/docs/api/api/helpers/index.html
index c11c066..48ebf39 100644
--- a/docs/api/api/helpers/index.html
+++ b/docs/api/api/helpers/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/helpers/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/helpers/">
       
       
         <link rel="prev" href="../framework/train/">
@@ -3965,7 +3965,7 @@ <h2 id="saev.helpers.submit_job_array" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/metrics/index.html b/docs/api/api/metrics/index.html
index 9ec48cf..dfc1f68 100644
--- a/docs/api/api/metrics/index.html
+++ b/docs/api/api/metrics/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/metrics/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/metrics/">
       
       
         <link rel="prev" href="../helpers/">
@@ -2403,7 +2403,7 @@ <h3 id="saev.metrics.Metrics.from_accumulators" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/nn/modeling/index.html b/docs/api/api/nn/modeling/index.html
index 87eb072..7d9c4cd 100644
--- a/docs/api/api/nn/modeling/index.html
+++ b/docs/api/api/nn/modeling/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/nn/modeling/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/nn/modeling/">
       
       
         <link rel="prev" href="../saev.nn/">
@@ -4149,7 +4149,7 @@ <h2 id="saev.nn.modeling.load" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/nn/objectives/index.html b/docs/api/api/nn/objectives/index.html
index 8ef2117..de37e7f 100644
--- a/docs/api/api/nn/objectives/index.html
+++ b/docs/api/api/nn/objectives/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/nn/objectives/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/nn/objectives/">
       
       
         <link rel="prev" href="../modeling/">
@@ -2936,7 +2936,7 @@ <h2 id="saev.nn.objectives.sample_prefixes" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/nn/saev.nn/index.html b/docs/api/api/nn/saev.nn/index.html
index 517df73..2f9ab87 100644
--- a/docs/api/api/nn/saev.nn/index.html
+++ b/docs/api/api/nn/saev.nn/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/nn/saev.nn/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/nn/saev.nn/">
       
       
         <link rel="prev" href="../../metrics/">
@@ -3394,7 +3394,7 @@ <h2 id="saev.nn.load" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/saev/index.html b/docs/api/api/saev/index.html
index e7a4219..ff17eca 100644
--- a/docs/api/api/saev/index.html
+++ b/docs/api/api/saev/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/saev/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/saev/">
       
       
         <link rel="prev" href="../../developers/datapoint-init/">
@@ -2111,7 +2111,7 @@ <h1>saev</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/summary/index.html b/docs/api/api/summary/index.html
index 95e89a4..6efadd3 100644
--- a/docs/api/api/summary/index.html
+++ b/docs/api/api/summary/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/summary/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/summary/">
       
       
       
@@ -1966,7 +1966,7 @@ <h1>Summary</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/utils/monitoring/index.html b/docs/api/api/utils/monitoring/index.html
index 0b99bbf..7242ae6 100644
--- a/docs/api/api/utils/monitoring/index.html
+++ b/docs/api/api/utils/monitoring/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/utils/monitoring/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/utils/monitoring/">
       
       
         <link rel="prev" href="../saev.utils/">
@@ -2187,7 +2187,7 @@ <h2 id="saev.utils.monitoring.DataloaderMonitor" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/utils/saev.utils/index.html b/docs/api/api/utils/saev.utils/index.html
index 2448e21..9f29f0d 100644
--- a/docs/api/api/utils/saev.utils/index.html
+++ b/docs/api/api/utils/saev.utils/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/utils/saev.utils/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/utils/saev.utils/">
       
       
         <link rel="prev" href="../../nn/objectives/">
@@ -2109,7 +2109,7 @@ <h1>saev.utils</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/utils/scheduling/index.html b/docs/api/api/utils/scheduling/index.html
index d9259a9..2aeb2ea 100644
--- a/docs/api/api/utils/scheduling/index.html
+++ b/docs/api/api/utils/scheduling/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/utils/scheduling/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/utils/scheduling/">
       
       
         <link rel="prev" href="../monitoring/">
@@ -2388,7 +2388,7 @@ <h2 id="saev.utils.scheduling.WarmupCosine" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/utils/statistics/index.html b/docs/api/api/utils/statistics/index.html
index 34481f2..1682dad 100644
--- a/docs/api/api/utils/statistics/index.html
+++ b/docs/api/api/utils/statistics/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/utils/statistics/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/utils/statistics/">
       
       
         <link rel="prev" href="../scheduling/">
@@ -2407,7 +2407,7 @@ <h2 id="saev.utils.statistics.calc_batch_entropy" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/utils/wandb/index.html b/docs/api/api/utils/wandb/index.html
index e38e3cb..688b234 100644
--- a/docs/api/api/utils/wandb/index.html
+++ b/docs/api/api/utils/wandb/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/utils/wandb/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/utils/wandb/">
       
       
         <link rel="prev" href="../statistics/">
@@ -2210,7 +2210,7 @@ <h2 id="saev.utils.wandb.ParallelWandbRun" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/api/viz/index.html b/docs/api/api/viz/index.html
index 8be8358..8b90bb7 100644
--- a/docs/api/api/viz/index.html
+++ b/docs/api/api/viz/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/api/viz/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/api/viz/">
       
       
         <link rel="prev" href="../utils/wandb/">
@@ -2203,7 +2203,7 @@ <h2 id="saev.viz.load_palette" class="doc doc-heading">
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/developers/contributing/index.html b/docs/api/developers/contributing/index.html
index 8dce31d..c807bfa 100644
--- a/docs/api/developers/contributing/index.html
+++ b/docs/api/developers/contributing/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/developers/contributing/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/developers/contributing/">
       
       
         <link rel="prev" href="../../users/bird-mae-debugging/">
@@ -2085,7 +2085,7 @@ <h2 id="project-layout">Project layout<a class="headerlink" href="#project-layou
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/developers/datapoint-init/index.html b/docs/api/developers/datapoint-init/index.html
index 24180c3..07a2909 100644
--- a/docs/api/developers/datapoint-init/index.html
+++ b/docs/api/developers/datapoint-init/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/developers/datapoint-init/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/developers/datapoint-init/">
       
       
         <link rel="prev" href="../disk-layout/">
@@ -2042,7 +2042,7 @@ <h1 id="datapoint-initialization">Datapoint Initialization<a class="headerlink"
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/developers/disk-layout/index.html b/docs/api/developers/disk-layout/index.html
index e9a65e1..e916e9d 100644
--- a/docs/api/developers/disk-layout/index.html
+++ b/docs/api/developers/disk-layout/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/developers/disk-layout/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/developers/disk-layout/">
       
       
         <link rel="prev" href="../protocol/">
@@ -2153,7 +2153,7 @@ <h2 id="faqs">FAQs<a class="headerlink" href="#faqs" title="Permanent link">&par
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/developers/naming/index.html b/docs/api/developers/naming/index.html
index 611385c..9a53d19 100644
--- a/docs/api/developers/naming/index.html
+++ b/docs/api/developers/naming/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/developers/naming/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/developers/naming/">
       
       
       
@@ -1933,7 +1933,7 @@ <h1 id="variable-naming">Variable Naming<a class="headerlink" href="#variable-na
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/developers/protocol/index.html b/docs/api/developers/protocol/index.html
index 235e389..9762ef2 100644
--- a/docs/api/developers/protocol/index.html
+++ b/docs/api/developers/protocol/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/developers/protocol/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/developers/protocol/">
       
       
         <link rel="prev" href="../contributing/">
@@ -2389,7 +2389,7 @@ <h2 id="5-versioning-compatibility">5 Versioning &amp; compatibility<a class="he
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/developers/workflows/index.html b/docs/api/developers/workflows/index.html
index 8c6670c..fcbe01d 100644
--- a/docs/api/developers/workflows/index.html
+++ b/docs/api/developers/workflows/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/developers/workflows/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/developers/workflows/">
       
       
       
@@ -1932,7 +1932,7 @@ <h1>Workflows</h1>
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/index.html b/docs/api/index.html
index 92b58f9..1cf20ae 100644
--- a/docs/api/index.html
+++ b/docs/api/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/">
       
       
       
@@ -1963,7 +1963,7 @@
 <h1 id="saev">saev<a class="headerlink" href="#saev" title="Permanent link">&para;</a></h1>
 <p><img alt="PyPI Downloads" src="https://static.pepy.tech/badge/saev" />
 <img alt="MIT License" src="https://img.shields.io/badge/License-MIT-efefef" />
-<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/OSU-NLP-group/saev?style=flat&amp;label=GitHub%20%E2%AD%90" /></p>
+<img alt="GitHub Repo stars" src="https://img.shields.io/github/stars/Imageomics/saev?style=flat&amp;label=GitHub%20%E2%AD%90" /></p>
 <p>saev is a framework for training and evaluating <strong>S</strong>parse <strong>a</strong>uto<strong>e</strong>ncoders (SAEs) for <strong>v</strong>ision transformers (ViTs), implemented in PyTorch.</p>
 <h2 id="installation">Installation<a class="headerlink" href="#installation" title="Permanent link">&para;</a></h2>
 <p>Installation is supported with <a href="https://docs.astral.sh/uv/">uv</a>.
@@ -2074,7 +2074,7 @@ <h2 id="why-saev">Why saev?<a class="headerlink" href="#why-saev" title="Permane
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/search/search_index.json b/docs/api/search/search_index.json
index e81c5f4..4b95a4f 100644
--- a/docs/api/search/search_index.json
+++ b/docs/api/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"<p>saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.</p>"},{"location":"#installation","title":"Installation","text":"<p>Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.</p> <p>Clone this repository, then from the root directory:</p> <pre><code>uv run scripts/launch.py --help\n</code></pre> <p>This will create a virtual environment and display the help for all the provided framework scripts.</p>"},{"location":"#quick-start","title":"Quick Start","text":"<p>Save some activations to disk:</p> <pre><code>uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n</code></pre> <p>Read the guide for details.</p>"},{"location":"#why-saev","title":"Why saev?","text":"<p>There are plenty of alternative libraries for SAEs:</p> <ul> <li>Overcomplete, primarily developed by Thomas Fel.</li> </ul> <p>However, saev has some benefits:</p> <ol> <li>saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.</li> <li>saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.</li> <li>saev includes complete code from preprints in the <code>contrib/</code> directory, along with logbooks describing how the authors used and developed saev.</li> </ol>"},{"location":"api/colors/","title":"saev.colors","text":"<p>Utility color palettes used across saev visualizations.</p>"},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"<code>dict_to_dataclass(data, cls)</code>","text":"<p>Recursively convert a dictionary to a dataclass instance.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -&gt; T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n</code></pre>"},{"location":"api/configs/#saev.configs.expand","title":"<code>expand(config)</code>","text":"<p>Expand a nested dict that may contain lists into many dicts.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef expand(config: dict[str, object]) -&gt; Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n</code></pre>"},{"location":"api/configs/#saev.configs.get_non_default_values","title":"<code>get_non_default_values(obj, default_obj)</code>","text":"<p>Recursively find fields that differ from defaults.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -&gt; dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n</code></pre>"},{"location":"api/configs/#saev.configs.load_cfgs","title":"<code>load_cfgs(override, *, default, sweep_dcts)</code>","text":"<p>Load a list of configs from a combination of sources.</p> <p>Parameters:</p> Name Type Description Default <code>override</code> <code>T</code> <p>Command-line overridden values.</p> required <code>default</code> <code>T</code> <p>The default values for a config.</p> required <code>sweep_dcts</code> <code>list[dict]</code> <p>A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.</p> required <p>Returns:</p> Type Description <code>tuple[list[T], list[str]]</code> <p>A list of configs and a list of errors.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -&gt; tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n</code></pre>"},{"location":"api/configs/#saev.configs.load_sweep","title":"<code>load_sweep(sweep_fpath)</code>","text":"<p>Load a sweep file and return the list of config dicts.</p> <p>Parameters:</p> Name Type Description Default <code>sweep_fpath</code> <code>Path</code> <p>Path to a Python file with a <code>make_cfgs()</code> function.</p> required <p>Returns:</p> Type Description <code>list[dict]</code> <p>List of config dictionaries from <code>make_cfgs()</code>. Returns empty list if any error occurs.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -&gt; list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n</code></pre>"},{"location":"api/disk/","title":"saev.disk","text":"<p>Helpers for sticking with the layout described in disk-layout.md.</p>"},{"location":"api/disk/#saev.disk.Run","title":"<code>Run(run_dir)</code>","text":"<p>Represents an SAE training run and some associated data.</p> <p>Parameters:</p> Name Type Description Default <code>run_dir</code> <code>Path</code> <p>Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use <code>Run.new()</code> to create a new run. required Source code in <code>src/saev/disk.py</code> <pre><code>def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) &lt; 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n</code></pre>"},{"location":"api/disk/#saev.disk.Run.ckpt","title":"<code>ckpt</code>  <code>property</code>","text":"<p>Path to the sae.pt checkpoint.</p>"},{"location":"api/disk/#saev.disk.Run.config","title":"<code>config</code>  <code>property</code>","text":"<p>The training run config. Not a train.Config object because we don't want to import from train.py.</p>"},{"location":"api/disk/#saev.disk.Run.inference","title":"<code>inference</code>  <code>property</code>","text":"<p>Path to the inference/ directory.</p>"},{"location":"api/disk/#saev.disk.Run.run_id","title":"<code>run_id</code>  <code>property</code>","text":"<p>The run ID, created by wandb.</p>"},{"location":"api/disk/#saev.disk.Run.train_shards","title":"<code>train_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.val_shards","title":"<code>val_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.new","title":"<code>new(run_id, *, train_shards_dir, val_shards_dir, runs_root)</code>  <code>classmethod</code>","text":"<p>Create a new run with directory structure and symlinks.</p> <p>Parameters:</p> Name Type Description Default <code>run_id</code> <code>str</code> <p>The run ID (typically from wandb).</p> required <code>train_shards_dir</code> <code>Path</code> <p>Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>val_shards_dir</code> <code>Path</code> <p>Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>runs_root</code> <code>Path</code> <p>Root directory for runs (typically $SAEV_NFS/saev/runs).</p> required <p>Returns:</p> Type Description <code>Run</code> <p>A new Run instance with all directories and symlinks created.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -&gt; \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n</code></pre>"},{"location":"api/disk/#saev.disk.is_runs_root","title":"<code>is_runs_root(path)</code>","text":"<p>Check if <code>path</code> is a valid runs root directory.</p> <p>A valid runs root ends with <code>saev/runs</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/runs.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_runs_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_dir","title":"<code>is_shards_dir(path)</code>","text":"<p>Check if <code>path</code> is a specific shards directory.</p> <p>A valid shards directory ends with <code>saev/shards/&lt;hash&gt;</code> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards/ with required files. Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/&lt;hash&gt;` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/&lt;hash&gt; with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) &lt; 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_root","title":"<code>is_shards_root(path)</code>","text":"<p>Check if <code>path</code> is a valid shards root directory.</p> <p>A valid shards root ends with <code>saev/shards</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n</code></pre>"},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"<code>RemovedFeatureError</code>","text":"<p>               Bases: <code>RuntimeError</code></p> <p>Feature existed before but is no longer supported.</p>"},{"location":"api/helpers/#saev.helpers.batched_idx","title":"<code>batched_idx(total_size, batch_size)</code>","text":"<p>Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.</p> <p>Parameters:</p> Name Type Description Default <code>total_size</code> <code>int</code> <p>total number of examples</p> required <code>batch_size</code> <code>int</code> <p>maximum distance between the generated indices.</p> required <p>Returns:</p> Type Description <p>A generator of (int, int) tuples that can slice up a list or a tensor.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"<code>__iter__()</code>","text":"<p>Yield (start, end) index pairs for batching.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __iter__(self) -&gt; Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"<code>__len__()</code>","text":"<p>Return the number of batches.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.progress","title":"<code>progress(it, *, every=10, desc='progress', total=0)</code>","text":"<p>Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.</p> <p>Parameters:</p> Name Type Description Default <code>it</code> <code>Iterable</code> <p>Iterable to wrap.</p> required <code>every</code> <code>int</code> <p>How many iterations between logging progress.</p> <code>10</code> <code>desc</code> <code>str</code> <p>What to name the logger.</p> <code>'progress'</code> <code>total</code> <code>int</code> <p>If non-zero, how long the iterable is.</p> <code>0</code> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n</code></pre>"},{"location":"api/helpers/#saev.helpers.csr_topk","title":"<code>csr_topk(arr, *, k, axis=0, batch_size=1024)</code>","text":"<p>Takes the top k values of a sparse CSR array.</p> <p>We can only iterate efficiently over rows because it's a a CSR array.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>csr_array | csr_matrix</code> <p>The CSR array of values with shape (rows, cols).</p> required <code>k</code> <code>int</code> <p>The k in \"top-k\".</p> required <code>axis</code> <code>int</code> <p>The dimension to sort along.</p> <code>0</code> <code>batch_size</code> <code>int</code> <p>How many rows to process at once.</p> <code>1024</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>saev.helpers.NumpyTopK</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -&gt; NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n</code></pre>"},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"<code>current_git_commit()</code>","text":"<p>Best-effort short SHA of the repo containing this file.</p> <p>Returns <code>None</code> when * <code>git</code> executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef current_git_commit() -&gt; str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n</code></pre>"},{"location":"api/helpers/#saev.helpers.flattened","title":"<code>flattened(dct, *, sep='.')</code>","text":"<p>Flatten a potentially nested dict to a single-level dict with <code>.</code>-separated keys.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -&gt; dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n</code></pre>"},{"location":"api/helpers/#saev.helpers.fssafe","title":"<code>fssafe(s)</code>","text":"<p>Convert a string to be filesystem-safe by replacing special characters.</p> <p>This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.</p> <p>Parameters:</p> Name Type Description Default <code>s</code> <code>str</code> <p>String to make filesystem-safe.</p> required <p>Returns:</p> Type Description <code>str</code> <p>Filesystem-safe version of the string.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef fssafe(s: str) -&gt; str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"&lt;\": \"_\",\n        \"&gt;\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"<code>get_cache_dir()</code>","text":"<p>Get cache directory from environment variables, defaulting to the current working directory (.)</p> <p>Returns:</p> Type Description <code>str</code> <p>A path to a cache directory (might not exist yet).</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_cache_dir() -&gt; str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"<code>get_slurm_job_count()</code>","text":"<p>Get the current number of jobs in the queue for the current user.</p> <p>Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_job_count() -&gt; int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"<code>get_slurm_max_array_size()</code>","text":"<p>Get the MaxArraySize configuration from the current Slurm cluster.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_array_size() -&gt; int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"<code>get_slurm_max_submit_jobs()</code>","text":"<p>Get the MaxSubmitJobs limit from the current user's QOS.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_submit_jobs() -&gt; int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.np_topk","title":"<code>np_topk(arr, k, axis=None)</code>","text":"<p>A numpy implementation of torch.topk.</p> <p>Returns the k largest elements along the given axis. If axis is None, the array is flattened first.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>ndarray</code> <p>Input array.</p> required <code>k</code> <code>int</code> <p>Number of top elements to return.</p> required <code>axis</code> <code>int | None</code> <p>Axis along which to find top k elements. If None, flattens array first.</p> <code>None</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>Array of k largest values along the specified axis, sorted in descending order.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -&gt; NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis &lt; 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"<code>submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)</code>","text":"<p>Submit jobs in batches to respect Slurm's MaxArraySize limit.</p> <p>Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.</p> <p>Parameters:</p> Name Type Description Default <code>executor</code> <p>A submitit executor (SlurmExecutor or LocalExecutor).</p> required <code>fn</code> <code>Callable</code> <p>Worker function to call for each config.</p> required <code>args_list</code> <code>list</code> <p>List of arguments to pass to fn.</p> required <code>logger</code> <code>Logger | None</code> <p>Optional logger for progress messages.</p> <code>None</code> <code>margin</code> <code>float</code> <p>Fraction of MaxArraySize to use (default 0.8).</p> <code>0.8</code> <p>Yields:</p> Type Description <code>int</code> <p>Tuples of (global_index, result) for successful jobs.</p> <code>object</code> <p>For failed jobs, yields (global_index, None) and logs a warning.</p> Example <pre><code>executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n</code></pre> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -&gt; Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n</code></pre>"},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"<code>Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements)</code>  <code>dataclass</code>","text":"<p>Validated reconstruction metrics aggregated over one evaluation corpus.</p> <p>The primary totals are <code>sse_recon</code> (SAE reconstruction SSE) and <code>sse_baseline</code> (mean-baseline SSE). Derived terms are: - <code>normalized_mse = sse_recon / sse_baseline</code> - <code>mse_per_dim = sse_recon / n_elements</code> - <code>mse_per_token = sse_recon / n_tokens</code> - <code>baseline_mse_per_dim = sse_baseline / n_elements</code> - <code>baseline_mse_per_token = sse_baseline / n_tokens</code></p> <p>Size terms are: - <code>n_tokens</code>: number of tokens included in aggregation - <code>d_model</code>: embedding width per token - <code>n_elements = n_tokens * d_model</code></p>"},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"<code>from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model)</code>  <code>classmethod</code>","text":"<p>Construct metrics from aggregate sums and shape information.</p> <p>Parameters:</p> Name Type Description Default <code>sse_recon</code> <code>float</code> <p>Sum of squared reconstruction errors over all selected tokens and dimensions.</p> required <code>sse_baseline</code> <code>float</code> <p>Sum of squared mean-baseline errors over the same tokens and dimensions.</p> required <code>n_tokens</code> <code>int</code> <p>Number of selected tokens in the aggregation set.</p> required <code>d_model</code> <code>int</code> <p>Activation dimension per token.</p> required <p>Returns:</p> Type Description <code>Metrics</code> <p>A validated <code>Metrics</code> object with all derived fields populated.</p> Source code in <code>src/saev/metrics.py</code> <pre><code>@classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -&gt; \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens &gt; 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model &gt; 0, msg\n    msg = f\"sse_recon must be &gt;= 0, got {sse_recon}.\"\n    assert sse_recon &gt;= 0.0, msg\n    msg = f\"sse_baseline must be &gt; 0, got {sse_baseline}.\"\n    assert sse_baseline &gt; 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n</code></pre>"},{"location":"api/saev/","title":"saev","text":"<p>saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.</p>"},{"location":"api/summary/","title":"Summary","text":"<ul> <li>saev</li> <li>saev.colors</li> <li>saev.configs</li> <li>saev.data</li> <li>saev.data.bird_mae</li> <li>saev.data.buffers</li> <li>saev.data.clip</li> <li>saev.data.datasets</li> <li>saev.data.dinov2</li> <li>saev.data.dinov3</li> <li>saev.data.fake_clip</li> <li>saev.data.indexed</li> <li>saev.data.models</li> <li>saev.data.ordered</li> <li>saev.data.pe</li> <li>saev.data.shards</li> <li>saev.data.shuffled</li> <li>saev.data.siglip</li> <li>saev.data.transforms</li> <li>saev.disk</li> <li>saev.framework</li> <li>saev.framework.inference</li> <li>saev.framework.shards</li> <li>saev.framework.train</li> <li>saev.helpers</li> <li>saev.metrics</li> <li>saev.nn</li> <li>saev.nn.modeling</li> <li>saev.nn.objectives</li> <li>saev.utils</li> <li>saev.utils.monitoring</li> <li>saev.utils.scheduling</li> <li>saev.utils.statistics</li> <li>saev.utils.wandb</li> <li>saev.viz</li> </ul>"},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"<code>load_palette(path)</code>","text":"<p>TODO: docstring.</p> Source code in <code>src/saev/viz.py</code> <pre><code>@beartype.beartype\ndef load_palette(path: pathlib.Path) -&gt; list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 &lt;= chan &lt;= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n</code></pre>"},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"<code>Encoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Pure PyTorch Bird-MAE backbone (no HF).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, cfg: Config) -&gt; None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"<code>PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Image (time x mel) to patch embeddings.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -&gt; None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"<code>Transformer(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"<code>filter_audio(waveform, sample_rate, patches, *, mode='time')</code>","text":"<p>Filter audio based on SAE patch activations over the log-mel spectrogram.</p> <p>Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.</p> <p>Parameters:</p> Name Type Description Default <code>waveform</code> <code>Float[Tensor, ' samples']</code> <p>Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.</p> required <code>sample_rate</code> <code>int</code> <p>Audio sample rate in Hz. Should be 32000 for Bird-MAE.</p> required <code>patches</code> <code>Bool[Tensor, ' content_tokens_per_example']</code> <p>Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.</p> required <code>mode</code> <code>Literal['time', 'time+freq']</code> <p>Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.</p> <code>'time'</code> <p>Returns:</p> Type Description <code>Float[Tensor, ' clipped']</code> <p>Filtered audio waveform as a 1D torch tensor.</p> Example <p>waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np)  # [512, 128] waveform = torch.from_numpy(waveform_np)</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -&gt; Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        &gt;&gt;&gt; waveform_np, sr = librosa.load(audio_path, sr=32000)\n        &gt;&gt;&gt; mel = bird_mae.transform(waveform_np)  # [512, 128]\n        &gt;&gt;&gt; waveform = torch.from_numpy(waveform_np)\n        &gt;&gt;&gt; # ... run through SAE to get patch_activations [256] ...\n        &gt;&gt;&gt; # ... covert SAE activations to bool with &gt; 0 ...\n        &gt;&gt;&gt; time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        &gt;&gt;&gt; time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() &lt; max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs &gt;= hz_low) &amp; (freqs &lt; hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start &lt; valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start &gt;= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with &gt; 0 ...","text":"<p>time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")</p>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"<code>transform(waveform)</code>","text":"<p>waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -&gt; Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples &lt; max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t &lt; BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t &gt; BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n</code></pre>"},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"<code>ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)</code>","text":"<p>Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -&gt; (x, meta). Random order, each sample delivered once, blocking semantics.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"<code>RingBuffer(slots, shape, dtype)</code>","text":"<p>Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.</p>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"<p>slots  : int           capacity in number of items (tensor rows) shape  : tuple[int]    shape of one item, e.g. (batch, dim) dtype  : torch.dtype   tensor dtype</p> <p>put(tensor)  : blocks if full get() -&gt; tensor  : blocks if empty qsize() -&gt; int        advisory size (approximate) close()               frees shared storage (call in the main process)</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots &gt; 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -&gt; Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"<code>get()</code>","text":"<p>Return a view of the next item; blocks if the queue is empty.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def get(self) -&gt; torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"<code>put(tensor)</code>","text":"<p>Copy <code>tensor</code> into the next free slot; blocks if the queue is full.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def put(self, tensor: torch.Tensor) -&gt; None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 &lt;&lt; 64)\n</code></pre>"},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Get patch size for CLIP models.</p>"},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"<code>BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).</p> <p>See https://www.kaggle.com/competitions/birdclef-2025/data for more information.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of bird audio samples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"<code>root = pathlib.Path('data/birdclef-2025')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory containing the BirdCLEF 2025 data.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"<code>split = 'train_audio'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which data split to use.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"<code>BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -&gt; target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"<code>n_classes</code>  <code>property</code>","text":"<p>Number of bird species.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"<code>Cifar10(name='uoft-cs/cifar10', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace CIFAR-10.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"<code>name = 'uoft-cs/cifar10'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"<code>root</code>  <code>property</code>","text":"<p>Dummy path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. Can be 'train' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"<code>DatasetConfig</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Abstract base class for dataset configurations.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"<code>n_examples</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Number of examples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"<code>root</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"<code>FakeImg(n_examples=10)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"<code>FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Tiny synthetic segmentation dataset for tests.</p> <p>Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which class index is considered background.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"<code>content_tokens_per_example = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"<code>n_classes = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of segmentation classes.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"<code>n_examples = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of examples.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"<code>FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Synthetic segmentation dataset providing pixel-level segmentation masks.</p> <p>Mimics ImgSegFolderDataset by providing:</p> <ul> <li>image: a dummy RGB PIL image</li> <li>segmentation: a PIL image with pixel-level class labels</li> <li>index, target, label</li> </ul> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"<code>Imagenet(name='ILSVRC/imagenet-1k', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace Imagenet.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"<code>name = 'ILSVRC/imagenet-1k'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this..</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"<code>ImgFolder(root=pathlib.Path('./data/split'))</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"<code>root = pathlib.Path('./data/split')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored. Can be a glob pattern to match multiple directories.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"<code>ImgFolderDataset(*args, sample_transform=None, **kwargs)</code>","text":"<p>               Bases: <code>ImageFolder</code></p> <p>A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"<code>__getitem__(index)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>index</code> <code>int</code> <p>Index</p> required <p>Returns:</p> Type Description <code>dict[str, object]</code> <p>dict with keys 'data', 'index', 'target' and 'label'.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __getitem__(self, index: int) -&gt; dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"<code>ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Background label.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"<code>labels_csv = 'labels.csv'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>CSV file with columns: stem,label1,label2,... First column must be 'stem'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"<code>root = pathlib.Path('./data/segdataset')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"<code>split = 'training'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"<code>get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>Gets the dataset for the current experiment; delegates construction to dataset-specific functions.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Config</code> <p>Config for the dataset.</p> required <code>data_tr</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> required <code>mask_tr</code> <p>Transform to be applied to masks.</p> required <code>dict_tr</code> <p>Transform to be applied to the entire sample dict.</p> required <p>Returns:     A dataset that has dictionaries with <code>'data'</code>, <code>'index'</code>, <code>'target'</code>, and <code>'label'</code> keys containing examples.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"<code>is_img_seg_dataset(data_cfg)</code>","text":"<p>Check if a dataset configuration is for an image segmentation dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data_cfg</code> <code>DatasetConfig</code> <p>Dataset configuration</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if this is an image segmentation dataset that should have labels.bin</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -&gt; bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n</code></pre>"},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"<code>Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None)</code>  <code>dataclass</code>","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"<code>depth = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of transformer blocks.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"<code>device = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Device for tensor operations.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"<code>embed_dim = 768</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Embedding dimension for transformer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"<code>ffn_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in feed-forward network.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"<code>ffn_layer = 'mlp'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Type of feed-forward network layer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"<code>ffn_ratio = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Feed-forward network expansion ratio.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"<code>img_size = 224</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Image width and height in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"<code>in_chans = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of input image channels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"<code>mask_k_bias = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to mask K bias in attention.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"<code>n_storage_tokens = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of storage/register tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"<code>num_heads = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of attention heads.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"<code>patch_size = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of each patch in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"<code>pos_embed_rope_base = 100.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Base frequency for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"<code>pos_embed_rope_dtype = 'bf16'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data type for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"<code>pos_embed_rope_max_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"<code>pos_embed_rope_min_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Minimum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"<code>pos_embed_rope_normalize_coords = 'separate'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Coordinate normalization method for RoPE encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"<code>proj_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in output projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"<code>qkv_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in QKV projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"<code>untie_global_and_local_cls_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use separate norms for global and local CLS tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"<code>PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)</code>","text":"<p>               Bases: <code>Module</code></p> <p>2D image to patch embedding: (B,C,H,W) -&gt; (B,N,D)</p> <p>Parameters:</p> Name Type Description Default <code>img_size</code> <code>int | tuple[int, int]</code> <p>Image size.</p> <code>224</code> <code>patch_size</code> <code>int | tuple[int, int]</code> <p>Patch token size.</p> <code>16</code> <code>in_chans</code> <code>int</code> <p>Number of input image channels.</p> <code>3</code> <code>embed_dim</code> <code>int</code> <p>Number of linear projection output channels.</p> <code>768</code> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -&gt; None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n</code></pre>"},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"<p>Fake CLIP model for testing with tiny-open-clip-model.</p> <p>This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Tiny model uses 2x2 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for tiny model (8x8 images).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"<code>Dataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"<code>Transformer</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Protocol defining the interface for all Transformer models.</p>"},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"<code>patch_size</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Patch size in pixels (e.g., 14 or 16).</p>"},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"<code>forward(batch)</code>  <code>abstractmethod</code>","text":"<p>Run forward pass on batch of images.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -&gt; Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"<code>get_residuals()</code>  <code>abstractmethod</code>","text":"<p>Return the list of residual blocks/layers for hook registration.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_residuals(self) -&gt; list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"<code>get_token_i(content_tokens_per_example)</code>  <code>abstractmethod</code>","text":"<p>Return indices for selecting relevant tokens from activations.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -&gt; slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"<code>make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"<code>make_transforms(ckpt, content_tokens_per_example)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.list_families","title":"<code>list_families()</code>","text":"<p>List all ViT family names.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>def list_families() -&gt; list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n</code></pre>"},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"<code>load_model_cls(family)</code>","text":"<p>Load a transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef load_model_cls(family: str) -&gt; type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n</code></pre>"},{"location":"api/data/models/#saev.data.models.register_family","title":"<code>register_family(cls)</code>","text":"<p>Register a new transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n</code></pre>"},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"<p>Ordered (sequential) dataloader for activation data.</p> <p>This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.</p> <p>Patch labels are provided if there is a labels.bin file on disk.</p> <p>See the design decisions in src/saev/data/performance.md.</p> Usage <p>cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ...     activations = batch[\"act\"]  # [batch_size, d_model] ...     image_indices = batch[\"example_idx\"]  # [batch_size] ...     patch_indices = batch[\"token_idx\"]  # [batch_size] ...     patch_labels = batch[\"patch_labels\"]  # [batch_size]</p>"},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/pe/","title":"saev.data.pe","text":"<p>Perception Encoder (PE) models from Meta (Bolya et al., 2025).</p> <p>PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Both are available via timm.</p>"},{"location":"api/data/pe/#saev.data.pe.Core","title":"<code>Core(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Core: CLIP-style model for language alignment.</p> <p>Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"<code>Spatial(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"<code>IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"<code>IndexedDataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"<code>OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"<code>OrderedDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"<code>ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"<code>ShuffledDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"<code>make_ordered_config(shuffled_cfg, **overrides)</code>","text":"<p>Create an <code>OrderedConfig</code> from a <code>ShuffledConfig</code>, with optional overrides.</p> <p>Defaults come from <code>shuffled_cfg</code> for fields present in <code>OrderedConfig</code>, and <code>overrides</code> take precedence. Unknown override fields raise <code>TypeError</code> from the <code>OrderedConfig</code> constructor, mirroring <code>dataclasses.replace</code>.</p> Source code in <code>src/saev/data/__init__.py</code> <pre><code>@beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -&gt; OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n</code></pre>"},{"location":"api/data/shards/","title":"saev.data.shards","text":"<p>Library code for reading and writing sharded activations to disk.</p>"},{"location":"api/data/shards/#saev.data.shards.Index","title":"<code>Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard)</code>  <code>dataclass</code>","text":"<p>Attributes:</p> Name Type Description <code>idx</code> <code>int</code> <p>The index of the activation.</p> <code>example_idx</code> <code>int</code> <p>The index of the original example (image, audio clip etc).</p> <code>content_token_idx</code> <code>int</code> <p>The token's index within an example's content. -1 for all special tokens.</p> <code>shard_idx</code> <code>int</code> <p>The shard index.</p> <code>example_idx_in_shard</code> <code>int</code> <p>The example index along the examples axis in a shard.</p> <code>token_idx_in_shard</code> <code>int</code> <p>The token index along the tokens axis in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"<code>IndexMap(md, tokens, layer)</code>","text":"<p>Attributes:</p> Name Type Description <code>md</code> <code>Metadata</code> <p>Metadata</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to load.</p> <code>layer</code> <code>int</code> <p>Which layer to load.</p> <code>layer_idx_lookup</code> <code>dict[int, int]</code> <p>The lookup from a transformer layer to the layer idx in the shard.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"<code>LabelsWriter(shards_dir, md)</code>","text":"<p>LabelsWriter handles writing patch-level segmentation labels to a single binary file.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>The shard directory; $SAEV_SCRATCH/saev/shards/ required <code>md</code> <code>Metadata</code> <p>The Metadata object.</p> required <p>Attributes:</p> Name Type Description <code>labels</code> <code>UInt8[ndarray, 'n_examples n_patches']</code> <p>The integer patch labels.</p> <code>labels_path</code> <code>Path</code> <p>Where the integer patch labels are stored.</p> <code>md</code> <code>Metadata</code> <p>The dataset metadata.</p> <code>has_written</code> <code>bool</code> <p>Whether we have written any data to <code>self.labels</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"<code>flush()</code>","text":"<p>Flush the memory-mapped file to disk if anything was written.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def flush(self) -&gt; None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"<code>write_batch(batch_labels, start_idx)</code>","text":"<p>Write a batch of labels to the memory-mapped file.</p> <p>Parameters:</p> Name Type Description Default <code>batch_labels</code> <code>ndarray | Tensor</code> <p>Array of shape (batch_size, content_tokens_per_example) with uint8 dtype</p> required <code>start_idx</code> <code>int</code> <p>Starting index in the global labels array</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size &lt;= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"<code>RecordedTransformer(model, content_tokens_per_example, cls_token, layers)</code>","text":"<p>               Bases: <code>Module</code></p> <p>A wrapper around a transformer model that records intermediate layer activations during forward passes.</p> <p>Parameters:</p> Name Type Description Default <code>model</code> <code>Module</code> <p>The transformer model to wrap.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether to record the [CLS] token in addition to content tokens.</p> required <code>layers</code> <code>Sequence[int]</code> <p>Which transformer layers to record activations from.</p> required <p>Attributes:</p> Name Type Description <code>model</code> <code>Module</code> <p>The wrapped transformer model.</p> <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> <code>cls_token</code> <code>bool</code> <p>Whether the [CLS] token is included in recorded activations.</p> <code>layers</code> <code>Sequence[int]</code> <p>Tuple of layer indices being recorded.</p> <code>token_i</code> <code>slice</code> <p>Token indices to extract from model outputs.</p> <code>logger</code> <p>Logger instance for this recorder.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Shard","title":"<code>Shard(name, n_examples)</code>  <code>dataclass</code>","text":"<p>A single shard entry in shards.json, recording the filename and number of examples.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The filename of the shard (e.g., \"acts000000.bin\").</p> <code>n_examples</code> <code>int</code> <p>Number of examples stored in this shard.</p>"},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"<code>ShardInfo(shards=list())</code>  <code>dataclass</code>","text":"<p>A container for shard metadata as recorded in shards.json.</p> <p>Parameters:</p> Name Type Description Default <code>shards</code> <code>list[Shard]</code> <p>A list of Shard objects.</p> <code>list()</code>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"<code>ShardWriter(shards_root, md)</code>","text":"<p>ShardWriter is a stateful object that handles sharded activation writing to disk.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>The $SAEV_SCRATCH/saev/shards path.</p> required <code>md</code> <code>Metadata</code> <p>The Metadata object for these shards.</p> required <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>The  $SAEV_SCRATCH/saev/shards/. <code>shard</code> <code>int</code> <code>acts_path</code> <code>Path</code> <code>acts</code> <code>Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None</code> <code>filled</code> <code>int</code> <code>labels_writer</code> <code>LabelsWriter</code> <p>The LabelsWriter writer.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"<code>__enter__()</code>","text":"<p>Context manager entry.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"<code>__exit__(exc_type, exc_val, exc_tb)</code>","text":"<p>Context manager exit - handle cleanup.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"<code>write_batch(activations, start_idx, patch_labels=None)</code>","text":"<p>Write a batch of activations and (optionally) patch labels.</p> <p>Parameters:</p> Name Type Description Default <code>activations</code> <code>Float[Tensor, 'batch n_layers all_patches d_model']</code> <p>Batch of activations to write.</p> required <code>start_idx</code> <code>int</code> <p>Starting index for this batch.</p> required <code>patch_labels</code> <code>UInt8[Tensor, 'batch n_patches'] | None</code> <p>Optional patch labels for segmentation datasets.</p> <code>None</code> Source code in <code>src/saev/data/shards.py</code> <pre><code>def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -&gt; None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx &gt;= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit &lt; batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 &lt;= {start_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        assert 0 &lt;= start_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 &lt;= {end_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 &lt;= end_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"<code>get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)</code>","text":"<p>Get a dataloader for a default map-style dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Config</code> <p>Config for the dataset.</p> required <code>batch_size</code> <code>int</code> <p>Batch size.</p> required <code>n_workers</code> <code>int</code> <p>Number of dataloader workers.</p> required <code>data_tr</code> <code>Callable | None</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> <code>None</code> <code>mask_tr</code> <code>Callable | None</code> <p>Transform to be applied to masks.</p> <code>None</code> <code>sample_tr</code> <code>Callable | None</code> <p>Transform to be applied to the entire sample dict.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataLoader</code> <p>A PyTorch Dataloader that yields dictionaries with <code>'data'</code> keys containing data batches, <code>'index'</code> keys containing original dataset indices and <code>'label'</code> keys containing label batches.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -&gt; torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers &gt; 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"<code>pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)</code>","text":"<p>Convert pixel-level segmentation to patch-level labels using vectorized operations.</p> <p>Parameters:</p> Name Type Description Default <code>seg</code> <code>Image</code> <p>Pixel-level segmentation mask as PIL Image</p> required <code>n_patches</code> <code>int</code> <p>Total number of patches expected</p> required <code>patch_size</code> <code>int</code> <p>Size of each patch in pixels</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>How to aggregate pixel labels into patch labels</p> <code>MAJORITY</code> <code>bg_label</code> <code>int</code> <p>Background label index</p> <code>0</code> <code>max_classes</code> <code>int</code> <p>Maximum number of classes (for bincount)</p> <code>256</code> <p>Returns:</p> Type Description <code>UInt8[Tensor, ' n_patches']</code> <p>Patch labels as uint8 tensor of shape (n_patches,)</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -&gt; UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -&gt; (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) &gt; 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"<code>worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>family</code> <code>str</code> <p>Transformer family (dinov2, dinov3, clip, etc).</p> required <code>ckpt</code> <code>str</code> <p>Transformer ckpt (hf-hub:imageomics/bioclip2, etc).</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token.</p> required <code>d_model</code> <code>int</code> <p>Hidden dimension of transformer.</p> required <code>layers</code> <code>list[int]</code> <p>The layers to record activations for.</p> required <code>data</code> <code>Config</code> <p>Config for the particular (image) dataset to load.</p> required <code>batch_size</code> <code>int</code> <p>Batch size for the dataset.</p> required <code>n_workers</code> <code>int</code> <p>Number of workers for loading examples fromm the dataset.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>Maximum number of tokens per disk shard.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>Optional method for aggregating segmentation label pixels.</p> <code>MAJORITY</code> <code>shards_root</code> <code>Path</code> <p>Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.</p> required <code>device</code> <code>str</code> <p>Device for doing the computation.</p> required <p>Returns:</p> Type Description <code>Path</code> <p>Path to the shards directory.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -&gt; pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n</code></pre>"},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"<code>conv2d_to_tokens(x_bchw, conv)</code>","text":"<p>Conv2d then flatten spatial to L, return (B, L, D).</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -&gt; b (h w) d\")\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"<code>resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)</code>","text":"<p>Resize image to (w, h) so that:   - w % p == 0, h % p == 0   - (h/p) * (w/p) == N   - Minimizes change in aspect ratio.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -&gt; Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p &lt;= 0 or n &lt;= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist &lt; best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"<code>unfolded_conv2d(x_bchw, conv)</code>","text":"<p>Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -&gt; b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n</code></pre>"},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"<p>Script for dumping SAE inference artifacts in a single pass over the dataset.</p> <p>Default mode writes 5 files:</p> <ol> <li>mean_values.pt</li> <li>sparsity.pt</li> <li>distributions.pt</li> <li>token_acts.npz</li> <li>metrics.json</li> </ol> <p>If save=False, only metrics.json is written.</p> <p>metrics.json is serialized from <code>saev.metrics.Metrics</code>.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"<code>Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for computing image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"<code>data = OrderedConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data configuration</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which accelerator to use.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"<code>force_recompute = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Force recomputation even if files exist.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which token labels to ignore when calculating summarized image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"<code>mem_gb = 80</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"<code>n_dists = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features to save distributions for.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"<code>n_hours = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"<code>run = pathlib.Path('./runs/abcdefg')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Path to the sae.pt file.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"<code>save = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to write token_acts/statistics files. If False, only metrics.json is written.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/inference/#saev.framework.inference.main","title":"<code>main(cfg, sweep=None)</code>","text":"<p>Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config inference.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> Source code in <code>src/saev/framework/inference.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n</code></pre>"},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"<p>Submitit entrypoint modules for SAE workflows.</p> <p><code>saev.framework</code> is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.</p>"},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"<p>To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.</p> <p>This script handles that additional complexity.</p> <p>Conceptually, activations are either thought of as</p> <ol> <li>A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset</li> <li>Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.</li> </ol>"},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"<code>Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs')</code>  <code>dataclass</code>","text":"<p>Configuration for calculating and saving ViT activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"<code>batch_size = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size for ViT inference.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"<code>ckpt = 'ViT-L-14/openai'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Specific model checkpoint.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"<code>cls_token = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the model has a [CLS] token.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"<code>content_tokens_per_example = 256</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dimension of the ViT activations (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"<code>data = dataclasses.field(default_factory=(datasets.Imagenet))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which dataset to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which device to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"<code>family = 'clip'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which model family.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"<code>layers = dataclasses.field(default_factory=(lambda: [-2]))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which layers to save. By default, the second-to-last layer.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"<code>log_to = './logs'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"<code>max_tokens_per_shard = 2400000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"<code>n_workers = 8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloader workers.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"<code>shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to write shards.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"<code>ssl = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use SSL.</p>"},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"<code>cli(cfg)</code>","text":"<p>Save ViT activations for use later on.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Configuration for activations.</p> required Source code in <code>src/saev/framework/shards.py</code> <pre><code>@beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n</code></pre>"},{"location":"api/framework/train/","title":"saev.framework.train","text":"<p>Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.</p> <p>Checklist for making sure your training doesn't suck:</p> <ul> <li>[ ] Data scaling: scale vectors so their average L2 norm is sqrt(n).</li> <li>[ ] Initialize b_e such that each feature activates 10K * d_model / (n * d_sae) of the time, which means that on average, each example activates 10K features.</li> <li>[x] Initialize b_d to 0.</li> <li>[x] Sweep learning rate and sparsity coefficients.</li> <li>[ ] Decay learning rate to 0 over the last 20% of training.</li> <li>[ ] Warmup sparsity over all of training.</li> <li>[x] Gradient clipping (clip at 1 with clip_grad_norm)</li> <li>[x] Track dead latents through training</li> </ul>"},{"location":"api/framework/train/#saev.framework.train.Config","title":"<code>Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for training a sparse autoencoder on a vision transformer.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Hardware device.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"<code>grad_clip = 1.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum gradient norm across all SAE parameters.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"<code>log_every = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How often to log to WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"<code>lr = 0.0004</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Learning rate.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"<code>mem_gb = 128</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"<code>n_lr_warmup = 500</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of learning rate warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"<code>n_sparsity_warmup = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of sparsity coefficient warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"<code>n_train = 100000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE training samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"<code>n_val = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE evaluation samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"<code>objective = nn.objectives.Matryoshka()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE objective configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"<code>optim = 'adam'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Optimizer for training.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"<code>runs_root = pathlib.Path('$SAEV_NFS/saev/runs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory for runs.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"<code>sae = nn.SparseAutoencoderConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"<code>seed = 42</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"<code>tags = ()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tags to add to WandB run.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"<code>track = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to track with WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"<code>train_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Training data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"<code>val_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Validation data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"<code>wandb_project = 'saev'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>WandB project name.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"<code>EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold)</code>  <code>dataclass</code>","text":"<p>Results of evaluating a trained SAE on a datset.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"<code>almost_dead_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for an \"almost dead\" neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"<code>dense_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for a dense neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"<code>freqs</code>  <code>instance-attribute</code>","text":"<p>How often each feature fired.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Mean L0 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Mean L1 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"<code>mean_values</code>  <code>instance-attribute</code>","text":"<p>The mean value for each feature when it did fire.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Mean MSE across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"<code>n_almost_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on fewer than <code>almost_dead_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that never fired on any example.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"<code>n_dense</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on more than <code>dense_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"<code>normalized_mse</code>  <code>instance-attribute</code>","text":"<p>Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"<code>sse_baseline</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the mean baseline.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"<code>sse_sae</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the SAE.</p>"},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"<code>evaluate(cfgs, saes, objectives)</code>","text":"<p>Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.</p> <p>The metrics computed are mean <code>L0</code>/<code>L1</code>/<code>MSE</code> losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of <code>EvalMetrics</code> is returned, one for each SAE.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -&gt; list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x &gt; 0, \"batch d_sae -&gt; d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -&gt; d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens &gt; 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline &gt; 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs &lt; almost_dead_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs &gt; dense_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.main","title":"<code>main(cfg, sweep=None, max_parallel=None)</code>","text":"<p>Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config for training an SAE.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> <code>max_parallel</code> <code>int | None</code> <p>Maximum SAEs to train concurrently within a single worker.</p> <code>None</code> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"<code>split_cfgs(cfgs)</code>","text":"<p>Splits configs into groups that can be parallelized.</p> <p>Parameters:</p> Name Type Description Default <code>cfgs</code> <code>list[Config]</code> <p>A list of configs from a sweep file.</p> required <p>Returns:</p> Type Description <code>list[list[Config]]</code> <p>A list of lists, where the configs in each sublist do not differ in any keys that are in <code>CANNOT_PARALLELIZE</code>. This means that each sublist is a valid \"parallel\" set of configs for <code>train</code>.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -&gt; list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.train","title":"<code>train(cfgs)</code>","text":"<p>Explicitly declare the optimizer, schedulers, dataloader, etc outside of <code>main</code> so that all the variables are dropped from scope and can be garbage collected.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -&gt; tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch &gt; 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse &gt; 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() &gt; 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |&lt;w_i, w_j&gt;| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n</code></pre>"},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"<p>Neural network architectures for sparse autoencoders.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"<code>AuxK(key='auxk', k_aux=512, alpha=1 / 32)</code>  <code>dataclass</code>","text":"<p>AuxK auxiliary reconstruction loss for dead latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"<code>BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero per sample in the batch.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"<code>BatchTopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>BatchTopK activation and inference-time threshold for sparse autoencoders.</p> <p>This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.</p> <p>Training mode (model.train()):     Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly <code>top_k</code> active features per example while allowing the \"activation budget\" to move between examples in the batch.</p> <pre><code>During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n</code></pre> <p>Eval mode (model.eval()):     At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:</p> <pre><code>    y = x if x &gt; theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n</code></pre> Inputs <p>x: Tensor of shape [batch, d_sae] containing pre-activation codes.</p> Outputs <p>Tensor of shape [batch, d_sae] with the same dtype and device as x, where either:     - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or     - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to each sample in the batch.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold &lt;= 0:\n            return torch.where(x &gt; 0, x, torch.zeros_like(x))\n\n        return torch.where(x &gt; self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x &gt; 0]\n        if pos.numel() &gt;= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"<code>NoAux(key='no-aux')</code>  <code>dataclass</code>","text":"<p>No auxiliary loss (e.g., for ReLU).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"<code>NoSparsity(key='no-sparsity')</code>  <code>dataclass</code>","text":"<p>No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"<code>Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux())</code>  <code>dataclass</code>","text":"<p>Vanilla ReLU</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"<code>TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"<code>TopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Top-K activation function. For use as activation function of sparse encoder.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to the input tensor.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"<code>Loss()</code>  <code>dataclass</code>","text":"<p>The loss term for an autoencoder training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"<code>Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000)</code>  <code>dataclass</code>","text":"<p>Config for the Matryoshka loss for another arbitrary SAE class.</p> <p>Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"<code>dead_threshold_tokens = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tokens without activation before a latent is considered dead.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"<code>n_prefixes = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of random length prefixes to use for loss calculation.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"<code>MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>Loss</code></p> <p>The composite loss terms for an training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"<code>aux</code>  <code>instance-attribute</code>","text":"<p>Auxiliary loss term (e.g., AuxK).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Sum of L0 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Sum of L1 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Average of reconstruction loss (mean squared error) for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of dead latents (per aux loss threshold).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"<code>sparsity</code>  <code>instance-attribute</code>","text":"<p>Sparsity loss, typically lambda * L1.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"<code>MatryoshkaObjective(cfg)</code>","text":"<p>               Bases: <code>Objective</code></p> <p>Torch module for calculating the matryoshka loss for an SAE.</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n</code></pre>"},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"<code>sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)</code>","text":"<p>Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)</p> <p>Parameters:</p> Name Type Description Default <code>d_sae</code> <code>int</code> <p>Total number of latent dimensions</p> required <code>n_prefixes</code> <code>int</code> <p>Number of prefixes to sample</p> required <code>min_prefix_length</code> <code>int</code> <p>Minimum length of any prefix</p> <code>1</code> <code>pareto_power</code> <code>float</code> <p>Power parameter for Pareto distribution (lower = more uniform)</p> <code>0.5</code> <p>Returns:</p> Type Description <code>Int64[Tensor, ' n_prefixes']</code> <p>torch.Tensor: Sorted prefix lengths</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>@torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -&gt; Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes &lt;= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes &lt;= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n</code></pre>"},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"<code>DataloaderMonitor(dataloader, process_factory=None)</code>","text":"<p>Tracks IO and CPU activity for the dataloader manager process and its children.</p> <p>The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call <code>compute()</code> whenever metrics are needed.</p> Source code in <code>src/saev/utils/monitoring.py</code> <pre><code>def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -&gt; None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n</code></pre>"},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"<code>BatchLimiter(dataloader, n_samples)</code>","text":"<p>Limits the number of batches to only return <code>n_samples</code> total samples.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"<code>__getattr__(name)</code>","text":"<p>Pass through attribute access to the wrapped dataloader.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __getattr__(self, name: str) -&gt; Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"<code>Warmup(init, final, n_steps)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>final</code> over <code>n_warmup_steps</code> steps.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"<code>WarmupCosine(init, n_warmup, peak, n_steps, final)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>peak</code> over <code>n_warmup</code> steps, then decrease down to final using cosine decay over n_steps - n_warmup.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"<code>PercentileEstimator(percentile, total, lr=0.001, shape=())</code>","text":"Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"<code>update(x)</code>","text":"<p>Update the estimator with a new value.</p> <p>This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>float | Tensor</code> <p>The new value to incorporate into the estimation</p> required Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"<code>calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)</code>","text":"<p>Compute entropy and coverage metrics for a batch of shuffled indices.</p> <p>The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.</p> Source code in <code>src/saev/utils/statistics.py</code> <pre><code>@beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -&gt; dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples &lt;= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example &lt;= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n</code></pre>"},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"<code>ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')</code>","text":"<p>Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387</p> Source code in <code>src/saev/utils/wandb.py</code> <pre><code>def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project, config=cfg, mode=mode, tags=tags, dir=dir\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n</code></pre>"},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"<pre><code>docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n</code></pre>"},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"<p>Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.</p> <p>Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:</p> <ol> <li>Select \\(n\\) random data points from your training data.</li> <li>Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).</li> <li>Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.</li> <li>Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.</li> <li>Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).</li> </ol> <p>Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).</p>"},{"location":"developers/disk-layout/","title":"Storage &amp; Run Manifest Spec (v1)","text":"<p>There are two main locations:</p> <ol> <li><code>$SAEV_SCRATCH/saev/shards</code>: where we store transformer activations (referred to as <code>shards_root</code> in the codebase).</li> <li><code>$SAEV_NFS/saev/runs</code>: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as <code>runs_root</code> in the codebase).</li> </ol> <p>Visually, these are:</p> <pre><code>$SAEV_SCRATCH/saev/\n  shards/\n    &lt;shard_hash&gt;/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n</code></pre> <p>and</p> <pre><code>$SAEV_NFS/saev/\n  runs/\n    &lt;run_id&gt;/\n      checkpoint/           # output of train.py on &lt;shard_hash&gt;\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        &lt;shard_hash&gt;/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n</code></pre> <p>Each <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/</code> MUST include:</p> <ul> <li><code>metadata.json</code> (UTF-8, canonical spec; see <code>protocol.md</code>)</li> <li><code>shards.json</code> (UTF-8, shard index and sizes; see <code>protocol.md</code>)</li> <li><code>acts*.bin</code> (binary shards; format in <code>protocol.md</code>)</li> <li><code>labels.bin</code> (binary patch labels aligned to shards; format in <code>protocol.md</code>)</li> </ul> <p>Note</p> <p>Immutability: Files under <code>saev/shards/&lt;shard_hash&gt;/</code> MUST be treated as read-only after publication. Any change yields a new <code>shard_hash</code>.</p> <p>All CLI entrypoints should accept a single <code>--run &lt;path&gt;</code> argument. Every other path MUST be resolved from the run root:</p> <ul> <li>ViT activations: <code>links/shards</code> \u2192 <code>saev/shards/&lt;shard_hash&gt;</code></li> <li>Dataset: <code>links/dataset</code> \u2192 Dataset root, wherever it is on disk.</li> <li>SAE checkpoint: <code>checkpoint/sae.pt</code></li> </ul> <p>Example resolution:</p> <pre><code>run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n</code></pre> <ul> <li><code>$SAEV_SCRATCH</code> and <code>$SAEV_NFS</code> should be set for all users/processes running saev tools.</li> </ul>"},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":"<ul> <li> <p>Where do patch labels live? Next to <code>acts*.bin</code> in <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/labels.bin</code>. Scripts discover them via <code>links/shards/labels.bin</code>.</p> </li> <li> <p>Can I put datasets directly in <code>$SAEV_SCRATCH</code>? Sure, but not in <code>$SAEV_SCRATCH/shards</code>.</p> </li> </ul>"},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"<p>saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). <code>saev.data</code> has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance. </p> <p>Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:</p> <ul> <li>mem-mappable</li> <li>Parameterized solely by the experiment configuration (<code>scripts/shards.py:Config</code>)</li> <li>Referenced by a content-hash, so identical configs collide, divergent ones never do</li> <li>Can be read quickly in a random order for training, and can be read (slowly) with random-access for visuals.</li> </ul> <p>This document is the single normative source. Any divergence in code is a bug.</p>"},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"<pre><code>&lt;dump_to&gt;/&lt;HASH&gt;/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n</code></pre> <p><code>HASH</code> = <code>sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8'))</code> Guards against silent config drift.</p>"},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. <code>metadata.json</code>","text":"field type semantic <code>family</code> string <code>\"clip\" \\| \"siglip\" \\| \"dinov2\"</code> <code>ckpt</code> string model identifier (OpenCLIP, HF, etc.) <code>layers</code> int[] ViT residual\u2010block indices recorded <code>patches_per_ex</code> int example patches only (excludes CLS) <code>cls_token</code> bool <code>true</code> -&gt; patch 0 is CLS, else no CLS <code>d_model</code> int activation dimensionality <code>n_examples</code> int total examples in dataset <code>patches_per_shard</code> int logical activations per shard (see #3) <code>data</code> object opaque dataset description <code>dataset</code> string absolute path to original dataset root <code>dtype</code> string numpy dtype. Fixed <code>\"float32\"</code> for now. <code>protocol</code> string <code>\"2.1\"</code> (shards after big refactor) <p>The <code>data</code> object is <code>base64.b64encode(pickle.dumps(img_ds)).decode('utf8')</code>.</p> <p>The <code>dataset</code> field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.</p>"},{"location":"developers/protocol/#22-shardsjson","title":"2.2. <code>shards.json</code>","text":"<p>A single array of <code>shard</code> objects, each of which has the following fields:</p> field type semantic name string shard filename (<code>acts000000.bin</code>). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"<pre><code>tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard, len(layers), tokens_per_ex, d_model,\n)\n</code></pre> <p><code>patches_per_shard</code> is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ <code>d_model = 1024</code>.</p> <p>The last shard will have a smaller value for <code>examples_per_shard</code>; this value is documented in <code>n_examples</code> in <code>shards.json</code></p>"},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"<p>The entire dataset of activations is treated as a single logical 4D tensor with the shape <code>(n_examples, len(layers), tokens_per_ex, d_model)</code>. This logical tensor is C-contiguous with axes ordered <code>[Example, Layer, Token, Dimension]</code>.</p> <p>Physically, this tensor is split along the first axis (<code>Example</code>) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.</p> <p>To locate an arbitrary activation vector, a reader must convert a logical coordinate (<code>global_ex_idx</code>, <code>layer_value</code>, <code>token_idx</code>) into a file path and an offset within that file.</p>"},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"<p>Let the parameters from <code>metadata.json</code> be:</p> <ul> <li>L = <code>len(layers)</code></li> <li>P = <code>patches_per_ex</code></li> <li>T = <code>P + (1 if cls_token else 0)</code> (Total tokens per example)</li> <li>D = <code>d_model</code></li> <li>S = <code>n_examples</code> from <code>shards.json</code> or <code>examples_per_shard</code> from Section 3 (shard sizing).</li> </ul>"},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"<p>Given a logical coordinate:</p> <ul> <li><code>global_ex_idx</code>: integer, with <code>0 &lt;= global_ex_idx &lt; n_examples</code></li> <li><code>layer</code>: integer, must be an element of <code>layers</code></li> <li><code>token_idx</code>: integer, <code>0 &lt;= token_idx &lt; T</code></li> </ul> <p>The physical location is found as follows:</p> <ol> <li> <p>Identify Shard:</p> <ul> <li><code>shard_idx = global_ex_idx // S</code></li> <li><code>ex_in_shard = global_ex_idx % S</code> The target file is <code>acts{shard_idx:06d}.bin</code>.</li> </ul> </li> <li> <p>Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical <code>layer_value</code> must be mapped to its index in the stored <code>layers</code> array.</p> <ul> <li><code>layer_idx = layers.index(layer)</code> A reader must raise an error if <code>layer</code> is not in <code>layers</code>.</li> </ul> </li> <li> <p>Calculate Offset: The data within a shard is a 4D tensor of shape <code>(S, L, T, D)</code>. The offset to the first byte of the desired activation vector <code>[ex_in_shard, layer_idx , token_idx]</code> is:</p> <ul> <li><code>offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx</code></li> <li><code>offset_in_bytes = offset_in_vectors * D * 4</code> (assuming 4 bytes for <code>float32</code>)</li> </ul> </li> </ol> <p>A reader can then seek to <code>offset_in_bytes</code> and read \\(D \\times 4\\) bytes to retrieve the vector.</p> <p>Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.</p>"},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"<p>The <code>token</code> axis of length \\(T\\) is ordered as follows: * If <code>cls_token</code> is <code>true</code>:     * Index <code>0</code>: [CLS] token activation     * Indices <code>1</code> to \\(P\\): Patch token activations * If <code>cls_token</code> is <code>false</code>:     * Indices <code>0</code> to \\(P-1\\): Patch token activations</p> <p>The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.</p>"},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning &amp; compatibility","text":"<ul> <li>Major changes (shape reorder, dtype switch, new required JSON keys) increment the major protocol version number at the top of this document and must emit a breaking warning in loader code.</li> <li>Minor, backward-compatible additions (new optional JSON key) merely update this doc and the minor protocol version number.</li> </ul> <p>That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.</p>"},{"location":"developers/workflows/","title":"Workflows","text":"<ol> <li>Generate inference activations (and thus visuals) for both training and validation splits.</li> </ol>"},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"<p>This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (<code>block.norm2</code>) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.</p>"},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"<p>While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.</p>"},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"<p>First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.</p> <p></p> <p>I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.</p> <p></p>"},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"<p>Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.</p> <p></p> <p>My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:</p> <pre><code>bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n</code></pre> <p>Something is broken inside of BirdMAE.</p>"},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"<p>Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.</p> <p></p> <p>BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.</p> <p></p> <p>Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.</p>"},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"<p>Below is the output from the attention layers (Graph #2) in our architecture diagram.</p> <p></p> <p>Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.</p> <p></p> <p>Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?</p> <p>Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.</p> <p></p> <p><code>fc2</code> has a <code>weight</code> parameter with shape (4096, 1024) and a <code>bias</code> parameter with shape (1024,). I take the L2 norm of <code>fc2.weight</code>'s columns to see if col 296/1024 is different.</p> <p></p> <p><code>fc2.weight</code> does appear to be different, and abnormally large (note the log scale). <code>fc2.bias</code> is also different, but it's not immediately obvious what's going on there to me.</p>"},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"<p>This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.</p> <p>We verified this by inspecting <code>norm2.weight</code> across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.</p> <p>The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.</p>"},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"<p>The fix is to record activations after <code>block.norm2</code> (the pre-MLP LayerNorm) instead of from the raw residual stream. In <code>saev</code>, this is implemented as:</p> <pre><code>def get_residuals(self) -&gt; list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n</code></pre> <p>After this change, the outlier is suppressed and SAE training works normally.</p>"},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"<ol> <li>Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.</li> <li>Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.</li> <li>Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.</li> </ol>"},{"location":"users/glossary/","title":"Glossary","text":"<p>Definitions for words used in the code and documentation.</p> <ul> <li>example: one dataset item (image, sentence, audio clip, point cloud, graph instance).</li> <li>token: one model position in the encoder\u2019s residual stream (the thing with hidden size <code>d_model</code>). Always \"token\" inside the model.</li> <li>content token: tokens derived from the raw input (image patches, wordpieces, audio windows, nodes, etc.).</li> <li>special token: tokens not directly derived from the raw input (class/summary token, [SEP], [MASK], [PAD], register tokens, etc.).</li> <li>sequence length L: total tokens per example (content + special). If variable, call it \u201cragged\u201d.</li> <li>layer: an integer index into the encoder\u2019s stack.</li> <li>activation kind (optional but useful): which stream you saved (e.g., resid_pre, resid_post, mlp_out, attn_out, qkv, head_out).</li> </ul> <p>Modality-specific vocab:</p> <ul> <li>patch (vision): a 2D content token. Often laid out on a grid with shape (H_patches, W_patches).</li> <li>frame/token or tube (video): content token in time \u00d7 space; often (T, H, W).</li> <li>wordpiece / subword (text): content token from a tokenizer.</li> <li>window / frame (audio): time\u2013frequency window.</li> <li>node (graph), point (point cloud).</li> </ul>"},{"location":"users/guide/","title":"Guide","text":"<p>This guide explains how to transition from the ADE20K demo to using <code>saev</code> with your own custom datasets.</p> <p>Here are the steps:</p> <ol> <li>Save ViT activations to disk</li> <li>Train SAEs on activations</li> <li>Evaluate the SAE checkpoints</li> <li>Visualize Learned Features</li> </ol> <p>Note</p> <p><code>saev</code> assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with <code>CUDA_VISIBLE_DEVICES=X</code> to run on GPU X.</p>"},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"<p>To save activations to disk, we need to specify:</p> <ol> <li>Which model we would like to use</li> <li>Which layers we would like to save.</li> <li>Where on disk and how we would like to save activations.</li> <li>Which images we want to save activations for.</li> </ol> <p>The <code>saev/framework/shards.py</code> script does all of this for us.</p> <p>Run <code>uv run launch.py shards --help</code> to see all the configuration.</p> <p>In practice, you might run:</p> <pre><code>uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n</code></pre> <p>This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory <code>/fs/scratch/PAS2136/samuelstevens/saev/shards</code>.</p> <p>Note</p> <p>A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.</p> <p>This script will also save a <code>metadata.json</code> file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in <code>.bin</code> files, numbered starting from 000000.</p> <p>To add your own models, see the guide to extending in <code>saev.activations</code>.</p>"},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"<p>To train an SAE, we need to specify:</p> <ol> <li>Which activations to use as input.</li> <li>SAE architectural stuff.</li> <li>Optimization-related stuff.</li> </ol> <p>The <code>train.py</code> script handles this.</p> <p>Run <code>uv run train.py --help</code> to see all the configuration.</p> <p>The most important options are:</p> <ul> <li><code>--runs-root</code>: where to store runs.</li> <li><code>--train-data</code> and <code>--val-data</code>: How to load the training and validation data. You probably want to specify both <code>--{train,val}-data.shards</code> (the shard directory) and <code>--{train,val}-data.layer</code> (which layer to use).</li> <li><code>sae.activation</code>: <code>sae.activation:relu</code> to use the ReLU activation.</li> </ul> <p>This is a full example:</p> <pre><code>uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n</code></pre> <p>This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.</p>"},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"<p>The training loop logs additional loader diagnostics derived from <code>calc_batch_entropy</code> in <code>train.py</code>. Every batch contributes two entropy measurements in natural log units:</p> <ul> <li><code>loader/example_entropy</code> and <code>loader/example_entropy_normalized</code> summarize how evenly the shuffled loader samples example indices. Normalization divides the raw entropy by <code>ln(metadata.n_examples)</code> so perfectly uniform sampling is 1.0.</li> <li><code>loader/token_entropy</code> and <code>loader/token_entropy_normalized</code> do the same for patch indices using <code>ln(metadata.content_tokens_per_example)</code> as the normalizer.</li> <li><code>loader/example_coverage</code> and <code>loader/token_coverage</code> report the fraction of distinct example or patch indices seen in the current batch relative to their theoretical support.</li> </ul> <p>All eight metrics appear alongside the existing <code>loader/read_mb</code> counters, helping spot skewed sampling or under-covered patches mid-run.</p>"},{"location":"users/guide/#evaluation","title":"Evaluation","text":"<p>After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch <code>torch.nn.Module</code> in combination with a <code>saev.data.OrderedDataLoader</code> or <code>saev.data.IndexedDataset</code>.</p> <p>However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The <code>saev/framework/inference.py</code> script calculates these metrics. You can run <code>uv run launch.py inference --help</code> to see all the options.</p> <p>The most important options are:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--data</code>: The options for the OrderedDataLoader. Specifically, you need to set <code>--data.shards</code> and <code>--data.layer</code>, just like for training.</li> </ul> <pre><code>uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n</code></pre>"},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"<p>Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse <code>token_acts.npz</code> file from the previous inference step.</p> <p>Warning</p> <p>Because there are so many different ways to visualize SAE features, I moved it to <code>contrib/trait_discovery</code> (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").</p> <p>The most important options:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--shards</code>: The shards directory.</li> <li><code>--latents</code>: The 0-indexed latents to save images for.</li> <li><code>--n-latents</code>: The number of randomly selected latents to save images for.</li> </ul> <p>So first, move into the <code>contrib/trait_discovery</code>:</p> <pre><code>cd contrib/trait_discovery\n</code></pre> <p>Then run the script that generates highlighted images:</p> <pre><code>uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n</code></pre> <p>Note</p> <p>Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (<code>--include-latents</code>). I recommend using <code>saev/interactive/metrics.py</code> with marimo to figure out good thresholds.</p>"},{"location":"users/guide/#sweeps","title":"Sweeps","text":"<p>tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.</p>"},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"<p>SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.</p> <p>A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.</p>"},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"<p>To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:</p> <pre>\nflowchart TD\n    A[Pre-computed ViT Activations] --&gt;|Slow I/O| B[Memory Buffer]\n    B --&gt;|Shared Batch| C[SAE Model 1]\n    B --&gt;|Shared Batch| D[SAE Model 2]\n    B --&gt;|Shared Batch| E[SAE Model 3]\n    B --&gt;|Shared Batch| F[...]\n</pre> <p>This approach:</p> <ul> <li>Loads each batch of activations once from disk</li> <li>Uses that same batch for multiple SAE models with different hyperparameters</li> <li>Amortizes the slow I/O cost across all models in the sweep</li> </ul>"},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"<p>The <code>train</code> command accepts a <code>--sweep</code> parameter that points to a TOML file defining the hyperparameter grid:</p> <pre><code>uv run python -m saev train --sweep configs/my_sweep.toml\n</code></pre> <p>Here's an example sweep configuration file:</p> <pre><code>[sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n</code></pre> <p>This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.</p>"},{"location":"users/guide/#limitations","title":"Limitations","text":"<p>Not all parameters can be swept in parallel. Parameters that affect data loading (like <code>batch_size</code> or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.</p>"},{"location":"users/inference/","title":"Inference","text":"<p>If you want to get started quickly, try the inference notebook in marimo or on Google Colab.</p> <p>Briefly, you need to:</p> <ol> <li>Download a checkpoint.</li> <li>Get the code.</li> <li>Load the checkpoint.</li> <li>Get activations.</li> </ol> <p>Details are below.</p>"},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"<p>First, download an SAE checkpoint from the Huggingface collection.</p>"},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"<p>Some repos (CLIP, BioCLIP, DINOv2) contain a single <code>sae.pt</code> at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.</p> <p>You can use <code>wget</code> if you want:</p> <pre><code>wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n</code></pre>"},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"<p>The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a <code>manifest.jsonl</code> with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.</p> <p>Download a specific checkpoint:</p> <pre><code>from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n</code></pre> <p>Download all checkpoints in a repo:</p> <pre><code>from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n</code></pre> <p>Available DINOv3 repos:</p> <ul> <li>osunlp/SAE_DINOv3_ViT-S-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-B-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-L-16_IN1K (layers 13-23)</li> <li>osunlp/SAE_DINOv3_TopK_ViT-L-16_IN1K (layers 13-23)</li> </ul>"},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"<p>The easiest way to do this is to clone the code:</p> <pre><code>git clone https://github.com/OSU-NLP-Group/saev\n</code></pre> <p>You can also install the package from git if you use uv (not sure about pip or cuda):</p> <pre><code>uv add git+https://github.com/OSU-NLP-Group/saev\n</code></pre> <p>Or clone it and install it as an editable with pip, lik <code>pip install -e .</code> in your virtual environment.</p> <p>Then you can do things like <code>from saev import ...</code>.</p> <p>Note</p> <p>If you struggle to get <code>saev</code> installed, open an issue on GitHub and I will figure out how to make it easier.</p>"},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"<pre><code>import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n</code></pre> <p>Now you have a pretrained SAE.</p>"},{"location":"users/inference/#get-activations","title":"Get Activations","text":"<p>This is the hardest part. We need to:</p> <ol> <li>Pass an image into a ViT</li> <li>Record the dense ViT activations at the same layer that the SAE was trained on.</li> <li>Pass the activations into the SAE to get sparse activations.</li> <li>Do something interesting with the sparse SAE activations.</li> </ol> <p>There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the <code>get_sae_latents()</code> functions in both files.</p> <p>Below is example code to do it using the <code>saev</code> package.</p> <pre><code>import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n</code></pre> <p>Now you have the sparse representation of all patches in the image (<code>out.f_x</code>) and the reconstructed activations (<code>out.x_hats</code>).</p> <p>You might select the dimensions with maximal values for each patch and see what other images are maximally activating.</p>"},{"location":"users/new-project/","title":"New Project Structure","text":"<p>saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.</p> <p>TL;DR:</p> <ol> <li>Fork saev.</li> <li>Clone your fork.</li> <li>Create a new directory in <code>contrib/</code>.</li> <li>Update both <code>src/saev</code> and your new contrib directory as necessary.</li> <li>(Hopefully) publish.</li> <li>If your changes to <code>src/saev</code> are broadly useful and not overly restrictive, open a PR with your changes to <code>src/saev</code>.</li> </ol> <p>I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.</p> <p>First, fork and clone saev. Do this however you want, but GitHub has a guide on it.</p> <p>Second, you probably want to store code related to your project in this repo. Make a new directory in <code>contrib/</code>. I'm calling my new subproject \"birdsong.\"</p> <pre><code>[I] samuelstevens@host ~/p/saev (main)&gt; tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n</code></pre> <p>Use <code>uv</code> to make a new package inside your new project:</p> <pre><code>[I] samuelstevens@host ~/p/s/c/birdsong (main)&gt; uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n</code></pre> <p>Now you have some additional files.</p> <pre><code>[I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)&gt; tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n</code></pre> <p>Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new <code>birdsong/notebooks</code> directory, and will add <code>birdsong/logbook.md</code> to store ongoing TODO items, and so on.</p> <p>To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit <code>src/saev/data/datasets.py</code>.</p> <p>I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in <code>src/saev/data</code>.</p> <p>If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with <code>birdsong</code> because that's specific to me, rather than to the library.<sup>1</sup></p> <ol> <li> <p>Technically, <code>birdsong</code> will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9</p> </li> </ol>"},{"location":"users/sweeps/","title":"Sweeps","text":"<p>Hyperparameter sweeps in <code>saev</code> train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.</p>"},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"<p>Create a Python file defining your sweep:</p> <pre><code># sweeps/my_sweep.py\n\ndef make_cfgs() -&gt; list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Run the sweep:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n</code></pre> <p>This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.</p>"},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"<p>SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:</p> <pre><code>\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n</code></pre>"},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"<p>Python sweeps give you full control over config generation. Your sweep file must define a <code>make_cfgs()</code> function that returns a list of dicts.</p> <p>Grid search example:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Paired parameters (not a grid):</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).</p> <p>Conditional sweeps:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae &lt;= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"<p>Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI &gt; Sweep &gt; Default.</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n</code></pre> <p>Override nested config fields with dotted notation:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n</code></pre> <p>Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.</p>"},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"<p>Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like <code>train_data</code>, <code>n_train</code>, <code>device</code>) must be identical across all configs in a parallel group.</p> <p>When configs differ in these parameters, they're automatically split into separate Slurm jobs:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]</p> <p>Implementation detail</p> <p>See <code>CANNOT_PARALLELIZE</code> in <code>train.py</code> for the full list of parameters that split parallel groups. The <code>split_cfgs()</code> function handles grouping automatically.</p>"},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"<p>Your sweep file is executed as a Python module, so you can use imports and helper functions:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Import mechanics</p> <p>The sweep file is loaded with <code>importlib.import_module()</code>, so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a <code>sweeps/</code> subdirectory).</p>"},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"<p>When running with <code>--slurm-acct</code>, each parallel group becomes a separate Slurm job:</p> <pre><code>uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n</code></pre> <p>The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results</p>"},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"<p>Seeds are automatically incremented for each config to ensure reproducibility:</p> <pre><code># Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n</code></pre> <p>Override the base seed on the command line:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py --seed 100\n</code></pre>"},{"location":"users/sweeps/#examples","title":"Examples","text":"<p>Simple grid:</p> <pre><code># sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n</code></pre> <p>Layer sweep with paired train/val:</p> <pre><code># sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Architecture sweep:</p> <pre><code># sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"<p>saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.</p>"},{"location":"#installation","title":"Installation","text":"<p>Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.</p> <p>Clone this repository, then from the root directory:</p> <pre><code>uv run scripts/launch.py --help\n</code></pre> <p>This will create a virtual environment and display the help for all the provided framework scripts.</p>"},{"location":"#quick-start","title":"Quick Start","text":"<p>Save some activations to disk:</p> <pre><code>uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n</code></pre> <p>Read the guide for details.</p>"},{"location":"#why-saev","title":"Why saev?","text":"<p>There are plenty of alternative libraries for SAEs:</p> <ul> <li>Overcomplete, primarily developed by Thomas Fel.</li> </ul> <p>However, saev has some benefits:</p> <ol> <li>saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.</li> <li>saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.</li> <li>saev includes complete code from preprints in the <code>contrib/</code> directory, along with logbooks describing how the authors used and developed saev.</li> </ol>"},{"location":"api/colors/","title":"saev.colors","text":"<p>Utility color palettes used across saev visualizations.</p>"},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"<code>dict_to_dataclass(data, cls)</code>","text":"<p>Recursively convert a dictionary to a dataclass instance.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -&gt; T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n</code></pre>"},{"location":"api/configs/#saev.configs.expand","title":"<code>expand(config)</code>","text":"<p>Expand a nested dict that may contain lists into many dicts.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef expand(config: dict[str, object]) -&gt; Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n</code></pre>"},{"location":"api/configs/#saev.configs.get_non_default_values","title":"<code>get_non_default_values(obj, default_obj)</code>","text":"<p>Recursively find fields that differ from defaults.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -&gt; dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n</code></pre>"},{"location":"api/configs/#saev.configs.load_cfgs","title":"<code>load_cfgs(override, *, default, sweep_dcts)</code>","text":"<p>Load a list of configs from a combination of sources.</p> <p>Parameters:</p> Name Type Description Default <code>override</code> <code>T</code> <p>Command-line overridden values.</p> required <code>default</code> <code>T</code> <p>The default values for a config.</p> required <code>sweep_dcts</code> <code>list[dict]</code> <p>A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.</p> required <p>Returns:</p> Type Description <code>tuple[list[T], list[str]]</code> <p>A list of configs and a list of errors.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -&gt; tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n</code></pre>"},{"location":"api/configs/#saev.configs.load_sweep","title":"<code>load_sweep(sweep_fpath)</code>","text":"<p>Load a sweep file and return the list of config dicts.</p> <p>Parameters:</p> Name Type Description Default <code>sweep_fpath</code> <code>Path</code> <p>Path to a Python file with a <code>make_cfgs()</code> function.</p> required <p>Returns:</p> Type Description <code>list[dict]</code> <p>List of config dictionaries from <code>make_cfgs()</code>. Returns empty list if any error occurs.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -&gt; list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n</code></pre>"},{"location":"api/disk/","title":"saev.disk","text":"<p>Helpers for sticking with the layout described in disk-layout.md.</p>"},{"location":"api/disk/#saev.disk.Run","title":"<code>Run(run_dir)</code>","text":"<p>Represents an SAE training run and some associated data.</p> <p>Parameters:</p> Name Type Description Default <code>run_dir</code> <code>Path</code> <p>Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use <code>Run.new()</code> to create a new run. required Source code in <code>src/saev/disk.py</code> <pre><code>def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) &lt; 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n</code></pre>"},{"location":"api/disk/#saev.disk.Run.ckpt","title":"<code>ckpt</code>  <code>property</code>","text":"<p>Path to the sae.pt checkpoint.</p>"},{"location":"api/disk/#saev.disk.Run.config","title":"<code>config</code>  <code>property</code>","text":"<p>The training run config. Not a train.Config object because we don't want to import from train.py.</p>"},{"location":"api/disk/#saev.disk.Run.inference","title":"<code>inference</code>  <code>property</code>","text":"<p>Path to the inference/ directory.</p>"},{"location":"api/disk/#saev.disk.Run.run_id","title":"<code>run_id</code>  <code>property</code>","text":"<p>The run ID, created by wandb.</p>"},{"location":"api/disk/#saev.disk.Run.train_shards","title":"<code>train_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.val_shards","title":"<code>val_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.new","title":"<code>new(run_id, *, train_shards_dir, val_shards_dir, runs_root)</code>  <code>classmethod</code>","text":"<p>Create a new run with directory structure and symlinks.</p> <p>Parameters:</p> Name Type Description Default <code>run_id</code> <code>str</code> <p>The run ID (typically from wandb).</p> required <code>train_shards_dir</code> <code>Path</code> <p>Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>val_shards_dir</code> <code>Path</code> <p>Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>runs_root</code> <code>Path</code> <p>Root directory for runs (typically $SAEV_NFS/saev/runs).</p> required <p>Returns:</p> Type Description <code>Run</code> <p>A new Run instance with all directories and symlinks created.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -&gt; \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n</code></pre>"},{"location":"api/disk/#saev.disk.is_runs_root","title":"<code>is_runs_root(path)</code>","text":"<p>Check if <code>path</code> is a valid runs root directory.</p> <p>A valid runs root ends with <code>saev/runs</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/runs.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_runs_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_dir","title":"<code>is_shards_dir(path)</code>","text":"<p>Check if <code>path</code> is a specific shards directory.</p> <p>A valid shards directory ends with <code>saev/shards/&lt;hash&gt;</code> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards/ with required files. Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/&lt;hash&gt;` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/&lt;hash&gt; with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) &lt; 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_root","title":"<code>is_shards_root(path)</code>","text":"<p>Check if <code>path</code> is a valid shards root directory.</p> <p>A valid shards root ends with <code>saev/shards</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n</code></pre>"},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"<code>RemovedFeatureError</code>","text":"<p>               Bases: <code>RuntimeError</code></p> <p>Feature existed before but is no longer supported.</p>"},{"location":"api/helpers/#saev.helpers.batched_idx","title":"<code>batched_idx(total_size, batch_size)</code>","text":"<p>Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.</p> <p>Parameters:</p> Name Type Description Default <code>total_size</code> <code>int</code> <p>total number of examples</p> required <code>batch_size</code> <code>int</code> <p>maximum distance between the generated indices.</p> required <p>Returns:</p> Type Description <p>A generator of (int, int) tuples that can slice up a list or a tensor.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"<code>__iter__()</code>","text":"<p>Yield (start, end) index pairs for batching.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __iter__(self) -&gt; Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"<code>__len__()</code>","text":"<p>Return the number of batches.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.progress","title":"<code>progress(it, *, every=10, desc='progress', total=0)</code>","text":"<p>Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.</p> <p>Parameters:</p> Name Type Description Default <code>it</code> <code>Iterable</code> <p>Iterable to wrap.</p> required <code>every</code> <code>int</code> <p>How many iterations between logging progress.</p> <code>10</code> <code>desc</code> <code>str</code> <p>What to name the logger.</p> <code>'progress'</code> <code>total</code> <code>int</code> <p>If non-zero, how long the iterable is.</p> <code>0</code> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n</code></pre>"},{"location":"api/helpers/#saev.helpers.csr_topk","title":"<code>csr_topk(arr, *, k, axis=0, batch_size=1024)</code>","text":"<p>Takes the top k values of a sparse CSR array.</p> <p>We can only iterate efficiently over rows because it's a a CSR array.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>csr_array | csr_matrix</code> <p>The CSR array of values with shape (rows, cols).</p> required <code>k</code> <code>int</code> <p>The k in \"top-k\".</p> required <code>axis</code> <code>int</code> <p>The dimension to sort along.</p> <code>0</code> <code>batch_size</code> <code>int</code> <p>How many rows to process at once.</p> <code>1024</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>saev.helpers.NumpyTopK</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -&gt; NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n</code></pre>"},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"<code>current_git_commit()</code>","text":"<p>Best-effort short SHA of the repo containing this file.</p> <p>Returns <code>None</code> when * <code>git</code> executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef current_git_commit() -&gt; str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n</code></pre>"},{"location":"api/helpers/#saev.helpers.flattened","title":"<code>flattened(dct, *, sep='.')</code>","text":"<p>Flatten a potentially nested dict to a single-level dict with <code>.</code>-separated keys.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -&gt; dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n</code></pre>"},{"location":"api/helpers/#saev.helpers.fssafe","title":"<code>fssafe(s)</code>","text":"<p>Convert a string to be filesystem-safe by replacing special characters.</p> <p>This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.</p> <p>Parameters:</p> Name Type Description Default <code>s</code> <code>str</code> <p>String to make filesystem-safe.</p> required <p>Returns:</p> Type Description <code>str</code> <p>Filesystem-safe version of the string.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef fssafe(s: str) -&gt; str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"&lt;\": \"_\",\n        \"&gt;\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"<code>get_cache_dir()</code>","text":"<p>Get cache directory from environment variables, defaulting to the current working directory (.)</p> <p>Returns:</p> Type Description <code>str</code> <p>A path to a cache directory (might not exist yet).</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_cache_dir() -&gt; str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"<code>get_slurm_job_count()</code>","text":"<p>Get the current number of jobs in the queue for the current user.</p> <p>Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_job_count() -&gt; int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"<code>get_slurm_max_array_size()</code>","text":"<p>Get the MaxArraySize configuration from the current Slurm cluster.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_array_size() -&gt; int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"<code>get_slurm_max_submit_jobs()</code>","text":"<p>Get the MaxSubmitJobs limit from the current user's QOS.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_submit_jobs() -&gt; int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.np_topk","title":"<code>np_topk(arr, k, axis=None)</code>","text":"<p>A numpy implementation of torch.topk.</p> <p>Returns the k largest elements along the given axis. If axis is None, the array is flattened first.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>ndarray</code> <p>Input array.</p> required <code>k</code> <code>int</code> <p>Number of top elements to return.</p> required <code>axis</code> <code>int | None</code> <p>Axis along which to find top k elements. If None, flattens array first.</p> <code>None</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>Array of k largest values along the specified axis, sorted in descending order.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -&gt; NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis &lt; 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"<code>submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)</code>","text":"<p>Submit jobs in batches to respect Slurm's MaxArraySize limit.</p> <p>Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.</p> <p>Parameters:</p> Name Type Description Default <code>executor</code> <p>A submitit executor (SlurmExecutor or LocalExecutor).</p> required <code>fn</code> <code>Callable</code> <p>Worker function to call for each config.</p> required <code>args_list</code> <code>list</code> <p>List of arguments to pass to fn.</p> required <code>logger</code> <code>Logger | None</code> <p>Optional logger for progress messages.</p> <code>None</code> <code>margin</code> <code>float</code> <p>Fraction of MaxArraySize to use (default 0.8).</p> <code>0.8</code> <p>Yields:</p> Type Description <code>int</code> <p>Tuples of (global_index, result) for successful jobs.</p> <code>object</code> <p>For failed jobs, yields (global_index, None) and logs a warning.</p> Example <pre><code>executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n</code></pre> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -&gt; Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n</code></pre>"},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"<code>Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements)</code>  <code>dataclass</code>","text":"<p>Validated reconstruction metrics aggregated over one evaluation corpus.</p> <p>The primary totals are <code>sse_recon</code> (SAE reconstruction SSE) and <code>sse_baseline</code> (mean-baseline SSE). Derived terms are: - <code>normalized_mse = sse_recon / sse_baseline</code> - <code>mse_per_dim = sse_recon / n_elements</code> - <code>mse_per_token = sse_recon / n_tokens</code> - <code>baseline_mse_per_dim = sse_baseline / n_elements</code> - <code>baseline_mse_per_token = sse_baseline / n_tokens</code></p> <p>Size terms are: - <code>n_tokens</code>: number of tokens included in aggregation - <code>d_model</code>: embedding width per token - <code>n_elements = n_tokens * d_model</code></p>"},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"<code>from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model)</code>  <code>classmethod</code>","text":"<p>Construct metrics from aggregate sums and shape information.</p> <p>Parameters:</p> Name Type Description Default <code>sse_recon</code> <code>float</code> <p>Sum of squared reconstruction errors over all selected tokens and dimensions.</p> required <code>sse_baseline</code> <code>float</code> <p>Sum of squared mean-baseline errors over the same tokens and dimensions.</p> required <code>n_tokens</code> <code>int</code> <p>Number of selected tokens in the aggregation set.</p> required <code>d_model</code> <code>int</code> <p>Activation dimension per token.</p> required <p>Returns:</p> Type Description <code>Metrics</code> <p>A validated <code>Metrics</code> object with all derived fields populated.</p> Source code in <code>src/saev/metrics.py</code> <pre><code>@classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -&gt; \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens &gt; 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model &gt; 0, msg\n    msg = f\"sse_recon must be &gt;= 0, got {sse_recon}.\"\n    assert sse_recon &gt;= 0.0, msg\n    msg = f\"sse_baseline must be &gt; 0, got {sse_baseline}.\"\n    assert sse_baseline &gt; 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n</code></pre>"},{"location":"api/saev/","title":"saev","text":"<p>saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.</p>"},{"location":"api/summary/","title":"Summary","text":"<ul> <li>saev</li> <li>saev.colors</li> <li>saev.configs</li> <li>saev.data</li> <li>saev.data.bird_mae</li> <li>saev.data.buffers</li> <li>saev.data.clip</li> <li>saev.data.datasets</li> <li>saev.data.dinov2</li> <li>saev.data.dinov3</li> <li>saev.data.fake_clip</li> <li>saev.data.indexed</li> <li>saev.data.models</li> <li>saev.data.ordered</li> <li>saev.data.pe</li> <li>saev.data.shards</li> <li>saev.data.shuffled</li> <li>saev.data.siglip</li> <li>saev.data.transforms</li> <li>saev.disk</li> <li>saev.framework</li> <li>saev.framework.inference</li> <li>saev.framework.shards</li> <li>saev.framework.train</li> <li>saev.helpers</li> <li>saev.metrics</li> <li>saev.nn</li> <li>saev.nn.modeling</li> <li>saev.nn.objectives</li> <li>saev.utils</li> <li>saev.utils.monitoring</li> <li>saev.utils.scheduling</li> <li>saev.utils.statistics</li> <li>saev.utils.wandb</li> <li>saev.viz</li> </ul>"},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"<code>load_palette(path)</code>","text":"<p>TODO: docstring.</p> Source code in <code>src/saev/viz.py</code> <pre><code>@beartype.beartype\ndef load_palette(path: pathlib.Path) -&gt; list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 &lt;= chan &lt;= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n</code></pre>"},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"<code>Encoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Pure PyTorch Bird-MAE backbone (no HF).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, cfg: Config) -&gt; None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"<code>PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Image (time x mel) to patch embeddings.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -&gt; None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"<code>Transformer(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"<code>filter_audio(waveform, sample_rate, patches, *, mode='time')</code>","text":"<p>Filter audio based on SAE patch activations over the log-mel spectrogram.</p> <p>Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.</p> <p>Parameters:</p> Name Type Description Default <code>waveform</code> <code>Float[Tensor, ' samples']</code> <p>Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.</p> required <code>sample_rate</code> <code>int</code> <p>Audio sample rate in Hz. Should be 32000 for Bird-MAE.</p> required <code>patches</code> <code>Bool[Tensor, ' content_tokens_per_example']</code> <p>Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.</p> required <code>mode</code> <code>Literal['time', 'time+freq']</code> <p>Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.</p> <code>'time'</code> <p>Returns:</p> Type Description <code>Float[Tensor, ' clipped']</code> <p>Filtered audio waveform as a 1D torch tensor.</p> Example <p>waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np)  # [512, 128] waveform = torch.from_numpy(waveform_np)</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -&gt; Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        &gt;&gt;&gt; waveform_np, sr = librosa.load(audio_path, sr=32000)\n        &gt;&gt;&gt; mel = bird_mae.transform(waveform_np)  # [512, 128]\n        &gt;&gt;&gt; waveform = torch.from_numpy(waveform_np)\n        &gt;&gt;&gt; # ... run through SAE to get patch_activations [256] ...\n        &gt;&gt;&gt; # ... covert SAE activations to bool with &gt; 0 ...\n        &gt;&gt;&gt; time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        &gt;&gt;&gt; time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() &lt; max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs &gt;= hz_low) &amp; (freqs &lt; hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start &lt; valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start &gt;= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with &gt; 0 ...","text":"<p>time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")</p>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"<code>transform(waveform)</code>","text":"<p>waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -&gt; Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples &lt; max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t &lt; BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t &gt; BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n</code></pre>"},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"<code>ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)</code>","text":"<p>Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -&gt; (x, meta). Random order, each sample delivered once, blocking semantics.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"<code>RingBuffer(slots, shape, dtype)</code>","text":"<p>Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.</p>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"<p>slots  : int           capacity in number of items (tensor rows) shape  : tuple[int]    shape of one item, e.g. (batch, dim) dtype  : torch.dtype   tensor dtype</p> <p>put(tensor)  : blocks if full get() -&gt; tensor  : blocks if empty qsize() -&gt; int        advisory size (approximate) close()               frees shared storage (call in the main process)</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots &gt; 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -&gt; Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"<code>get()</code>","text":"<p>Return a view of the next item; blocks if the queue is empty.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def get(self) -&gt; torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"<code>put(tensor)</code>","text":"<p>Copy <code>tensor</code> into the next free slot; blocks if the queue is full.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def put(self, tensor: torch.Tensor) -&gt; None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 &lt;&lt; 64)\n</code></pre>"},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Get patch size for CLIP models.</p>"},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"<code>BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).</p> <p>See https://www.kaggle.com/competitions/birdclef-2025/data for more information.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of bird audio samples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"<code>root = pathlib.Path('data/birdclef-2025')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory containing the BirdCLEF 2025 data.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"<code>split = 'train_audio'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which data split to use.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"<code>BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -&gt; target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"<code>n_classes</code>  <code>property</code>","text":"<p>Number of bird species.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"<code>Cifar10(name='uoft-cs/cifar10', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace CIFAR-10.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"<code>name = 'uoft-cs/cifar10'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"<code>root</code>  <code>property</code>","text":"<p>Dummy path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. Can be 'train' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"<code>DatasetConfig</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Abstract base class for dataset configurations.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"<code>n_examples</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Number of examples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"<code>root</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"<code>FakeImg(n_examples=10)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"<code>FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Tiny synthetic segmentation dataset for tests.</p> <p>Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which class index is considered background.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"<code>content_tokens_per_example = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"<code>n_classes = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of segmentation classes.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"<code>n_examples = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of examples.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"<code>FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Synthetic segmentation dataset providing pixel-level segmentation masks.</p> <p>Mimics ImgSegFolderDataset by providing:</p> <ul> <li>image: a dummy RGB PIL image</li> <li>segmentation: a PIL image with pixel-level class labels</li> <li>index, target, label</li> </ul> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"<code>Imagenet(name='ILSVRC/imagenet-1k', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace Imagenet.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"<code>name = 'ILSVRC/imagenet-1k'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this..</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"<code>ImgFolder(root=pathlib.Path('./data/split'))</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"<code>root = pathlib.Path('./data/split')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored. Can be a glob pattern to match multiple directories.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"<code>ImgFolderDataset(*args, sample_transform=None, **kwargs)</code>","text":"<p>               Bases: <code>ImageFolder</code></p> <p>A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"<code>__getitem__(index)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>index</code> <code>int</code> <p>Index</p> required <p>Returns:</p> Type Description <code>dict[str, object]</code> <p>dict with keys 'data', 'index', 'target' and 'label'.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __getitem__(self, index: int) -&gt; dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"<code>ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Background label.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"<code>labels_csv = 'labels.csv'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>CSV file with columns: stem,label1,label2,... First column must be 'stem'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"<code>root = pathlib.Path('./data/segdataset')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"<code>split = 'training'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"<code>get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>Gets the dataset for the current experiment; delegates construction to dataset-specific functions.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Config</code> <p>Config for the dataset.</p> required <code>data_tr</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> required <code>mask_tr</code> <p>Transform to be applied to masks.</p> required <code>dict_tr</code> <p>Transform to be applied to the entire sample dict.</p> required <p>Returns:     A dataset that has dictionaries with <code>'data'</code>, <code>'index'</code>, <code>'target'</code>, and <code>'label'</code> keys containing examples.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"<code>is_img_seg_dataset(data_cfg)</code>","text":"<p>Check if a dataset configuration is for an image segmentation dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data_cfg</code> <code>DatasetConfig</code> <p>Dataset configuration</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if this is an image segmentation dataset that should have labels.bin</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -&gt; bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n</code></pre>"},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"<code>Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None)</code>  <code>dataclass</code>","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"<code>depth = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of transformer blocks.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"<code>device = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Device for tensor operations.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"<code>embed_dim = 768</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Embedding dimension for transformer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"<code>ffn_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in feed-forward network.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"<code>ffn_layer = 'mlp'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Type of feed-forward network layer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"<code>ffn_ratio = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Feed-forward network expansion ratio.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"<code>img_size = 224</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Image width and height in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"<code>in_chans = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of input image channels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"<code>mask_k_bias = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to mask K bias in attention.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"<code>n_storage_tokens = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of storage/register tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"<code>num_heads = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of attention heads.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"<code>patch_size = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of each patch in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"<code>pos_embed_rope_base = 100.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Base frequency for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"<code>pos_embed_rope_dtype = 'bf16'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data type for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"<code>pos_embed_rope_max_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"<code>pos_embed_rope_min_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Minimum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"<code>pos_embed_rope_normalize_coords = 'separate'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Coordinate normalization method for RoPE encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"<code>proj_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in output projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"<code>qkv_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in QKV projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"<code>untie_global_and_local_cls_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use separate norms for global and local CLS tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"<code>PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)</code>","text":"<p>               Bases: <code>Module</code></p> <p>2D image to patch embedding: (B,C,H,W) -&gt; (B,N,D)</p> <p>Parameters:</p> Name Type Description Default <code>img_size</code> <code>int | tuple[int, int]</code> <p>Image size.</p> <code>224</code> <code>patch_size</code> <code>int | tuple[int, int]</code> <p>Patch token size.</p> <code>16</code> <code>in_chans</code> <code>int</code> <p>Number of input image channels.</p> <code>3</code> <code>embed_dim</code> <code>int</code> <p>Number of linear projection output channels.</p> <code>768</code> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -&gt; None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n</code></pre>"},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"<p>Fake CLIP model for testing with tiny-open-clip-model.</p> <p>This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Tiny model uses 2x2 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for tiny model (8x8 images).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"<code>Dataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"<code>Transformer</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Protocol defining the interface for all Transformer models.</p>"},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"<code>patch_size</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Patch size in pixels (e.g., 14 or 16).</p>"},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"<code>forward(batch)</code>  <code>abstractmethod</code>","text":"<p>Run forward pass on batch of images.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -&gt; Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"<code>get_residuals()</code>  <code>abstractmethod</code>","text":"<p>Return the list of residual blocks/layers for hook registration.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_residuals(self) -&gt; list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"<code>get_token_i(content_tokens_per_example)</code>  <code>abstractmethod</code>","text":"<p>Return indices for selecting relevant tokens from activations.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -&gt; slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"<code>make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"<code>make_transforms(ckpt, content_tokens_per_example)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.list_families","title":"<code>list_families()</code>","text":"<p>List all ViT family names.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>def list_families() -&gt; list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n</code></pre>"},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"<code>load_model_cls(family)</code>","text":"<p>Load a transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef load_model_cls(family: str) -&gt; type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n</code></pre>"},{"location":"api/data/models/#saev.data.models.register_family","title":"<code>register_family(cls)</code>","text":"<p>Register a new transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n</code></pre>"},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"<p>Ordered (sequential) dataloader for activation data.</p> <p>This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.</p> <p>Patch labels are provided if there is a labels.bin file on disk.</p> <p>See the design decisions in src/saev/data/performance.md.</p> Usage <p>cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ...     activations = batch[\"act\"]  # [batch_size, d_model] ...     image_indices = batch[\"example_idx\"]  # [batch_size] ...     patch_indices = batch[\"token_idx\"]  # [batch_size] ...     patch_labels = batch[\"patch_labels\"]  # [batch_size]</p>"},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/pe/","title":"saev.data.pe","text":"<p>Perception Encoder (PE) models from Meta (Bolya et al., 2025).</p> <p>PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Both are available via timm.</p>"},{"location":"api/data/pe/#saev.data.pe.Core","title":"<code>Core(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Core: CLIP-style model for language alignment.</p> <p>Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"<code>Spatial(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"<code>IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"<code>IndexedDataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"<code>OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"<code>OrderedDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"<code>ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"<code>ShuffledDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"<code>make_ordered_config(shuffled_cfg, **overrides)</code>","text":"<p>Create an <code>OrderedConfig</code> from a <code>ShuffledConfig</code>, with optional overrides.</p> <p>Defaults come from <code>shuffled_cfg</code> for fields present in <code>OrderedConfig</code>, and <code>overrides</code> take precedence. Unknown override fields raise <code>TypeError</code> from the <code>OrderedConfig</code> constructor, mirroring <code>dataclasses.replace</code>.</p> Source code in <code>src/saev/data/__init__.py</code> <pre><code>@beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -&gt; OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n</code></pre>"},{"location":"api/data/shards/","title":"saev.data.shards","text":"<p>Library code for reading and writing sharded activations to disk.</p>"},{"location":"api/data/shards/#saev.data.shards.Index","title":"<code>Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard)</code>  <code>dataclass</code>","text":"<p>Attributes:</p> Name Type Description <code>idx</code> <code>int</code> <p>The index of the activation.</p> <code>example_idx</code> <code>int</code> <p>The index of the original example (image, audio clip etc).</p> <code>content_token_idx</code> <code>int</code> <p>The token's index within an example's content. -1 for all special tokens.</p> <code>shard_idx</code> <code>int</code> <p>The shard index.</p> <code>example_idx_in_shard</code> <code>int</code> <p>The example index along the examples axis in a shard.</p> <code>token_idx_in_shard</code> <code>int</code> <p>The token index along the tokens axis in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"<code>IndexMap(md, tokens, layer)</code>","text":"<p>Attributes:</p> Name Type Description <code>md</code> <code>Metadata</code> <p>Metadata</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to load.</p> <code>layer</code> <code>int</code> <p>Which layer to load.</p> <code>layer_idx_lookup</code> <code>dict[int, int]</code> <p>The lookup from a transformer layer to the layer idx in the shard.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"<code>LabelsWriter(shards_dir, md)</code>","text":"<p>LabelsWriter handles writing patch-level segmentation labels to a single binary file.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>The shard directory; $SAEV_SCRATCH/saev/shards/ required <code>md</code> <code>Metadata</code> <p>The Metadata object.</p> required <p>Attributes:</p> Name Type Description <code>labels</code> <code>UInt8[ndarray, 'n_examples n_patches']</code> <p>The integer patch labels.</p> <code>labels_path</code> <code>Path</code> <p>Where the integer patch labels are stored.</p> <code>md</code> <code>Metadata</code> <p>The dataset metadata.</p> <code>has_written</code> <code>bool</code> <p>Whether we have written any data to <code>self.labels</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"<code>flush()</code>","text":"<p>Flush the memory-mapped file to disk if anything was written.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def flush(self) -&gt; None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"<code>write_batch(batch_labels, start_idx)</code>","text":"<p>Write a batch of labels to the memory-mapped file.</p> <p>Parameters:</p> Name Type Description Default <code>batch_labels</code> <code>ndarray | Tensor</code> <p>Array of shape (batch_size, content_tokens_per_example) with uint8 dtype</p> required <code>start_idx</code> <code>int</code> <p>Starting index in the global labels array</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size &lt;= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"<code>RecordedTransformer(model, content_tokens_per_example, cls_token, layers)</code>","text":"<p>               Bases: <code>Module</code></p> <p>A wrapper around a transformer model that records intermediate layer activations during forward passes.</p> <p>Parameters:</p> Name Type Description Default <code>model</code> <code>Module</code> <p>The transformer model to wrap.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether to record the [CLS] token in addition to content tokens.</p> required <code>layers</code> <code>Sequence[int]</code> <p>Which transformer layers to record activations from.</p> required <p>Attributes:</p> Name Type Description <code>model</code> <code>Module</code> <p>The wrapped transformer model.</p> <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> <code>cls_token</code> <code>bool</code> <p>Whether the [CLS] token is included in recorded activations.</p> <code>layers</code> <code>Sequence[int]</code> <p>Tuple of layer indices being recorded.</p> <code>token_i</code> <code>slice</code> <p>Token indices to extract from model outputs.</p> <code>logger</code> <p>Logger instance for this recorder.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Shard","title":"<code>Shard(name, n_examples)</code>  <code>dataclass</code>","text":"<p>A single shard entry in shards.json, recording the filename and number of examples.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The filename of the shard (e.g., \"acts000000.bin\").</p> <code>n_examples</code> <code>int</code> <p>Number of examples stored in this shard.</p>"},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"<code>ShardInfo(shards=list())</code>  <code>dataclass</code>","text":"<p>A container for shard metadata as recorded in shards.json.</p> <p>Parameters:</p> Name Type Description Default <code>shards</code> <code>list[Shard]</code> <p>A list of Shard objects.</p> <code>list()</code>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"<code>ShardWriter(shards_root, md)</code>","text":"<p>ShardWriter is a stateful object that handles sharded activation writing to disk.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>The $SAEV_SCRATCH/saev/shards path.</p> required <code>md</code> <code>Metadata</code> <p>The Metadata object for these shards.</p> required <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>The  $SAEV_SCRATCH/saev/shards/. <code>shard</code> <code>int</code> <code>acts_path</code> <code>Path</code> <code>acts</code> <code>Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None</code> <code>filled</code> <code>int</code> <code>labels_writer</code> <code>LabelsWriter</code> <p>The LabelsWriter writer.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"<code>__enter__()</code>","text":"<p>Context manager entry.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"<code>__exit__(exc_type, exc_val, exc_tb)</code>","text":"<p>Context manager exit - handle cleanup.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"<code>write_batch(activations, start_idx, patch_labels=None)</code>","text":"<p>Write a batch of activations and (optionally) patch labels.</p> <p>Parameters:</p> Name Type Description Default <code>activations</code> <code>Float[Tensor, 'batch n_layers all_patches d_model']</code> <p>Batch of activations to write.</p> required <code>start_idx</code> <code>int</code> <p>Starting index for this batch.</p> required <code>patch_labels</code> <code>UInt8[Tensor, 'batch n_patches'] | None</code> <p>Optional patch labels for segmentation datasets.</p> <code>None</code> Source code in <code>src/saev/data/shards.py</code> <pre><code>def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -&gt; None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx &gt;= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit &lt; batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 &lt;= {start_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        assert 0 &lt;= start_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 &lt;= {end_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 &lt;= end_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"<code>get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)</code>","text":"<p>Get a dataloader for a default map-style dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Config</code> <p>Config for the dataset.</p> required <code>batch_size</code> <code>int</code> <p>Batch size.</p> required <code>n_workers</code> <code>int</code> <p>Number of dataloader workers.</p> required <code>data_tr</code> <code>Callable | None</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> <code>None</code> <code>mask_tr</code> <code>Callable | None</code> <p>Transform to be applied to masks.</p> <code>None</code> <code>sample_tr</code> <code>Callable | None</code> <p>Transform to be applied to the entire sample dict.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataLoader</code> <p>A PyTorch Dataloader that yields dictionaries with <code>'data'</code> keys containing data batches, <code>'index'</code> keys containing original dataset indices and <code>'label'</code> keys containing label batches.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -&gt; torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers &gt; 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"<code>pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)</code>","text":"<p>Convert pixel-level segmentation to patch-level labels using vectorized operations.</p> <p>Parameters:</p> Name Type Description Default <code>seg</code> <code>Image</code> <p>Pixel-level segmentation mask as PIL Image</p> required <code>n_patches</code> <code>int</code> <p>Total number of patches expected</p> required <code>patch_size</code> <code>int</code> <p>Size of each patch in pixels</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>How to aggregate pixel labels into patch labels</p> <code>MAJORITY</code> <code>bg_label</code> <code>int</code> <p>Background label index</p> <code>0</code> <code>max_classes</code> <code>int</code> <p>Maximum number of classes (for bincount)</p> <code>256</code> <p>Returns:</p> Type Description <code>UInt8[Tensor, ' n_patches']</code> <p>Patch labels as uint8 tensor of shape (n_patches,)</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -&gt; UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -&gt; (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) &gt; 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"<code>worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>family</code> <code>str</code> <p>Transformer family (dinov2, dinov3, clip, etc).</p> required <code>ckpt</code> <code>str</code> <p>Transformer ckpt (hf-hub:imageomics/bioclip2, etc).</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token.</p> required <code>d_model</code> <code>int</code> <p>Hidden dimension of transformer.</p> required <code>layers</code> <code>list[int]</code> <p>The layers to record activations for.</p> required <code>data</code> <code>Config</code> <p>Config for the particular (image) dataset to load.</p> required <code>batch_size</code> <code>int</code> <p>Batch size for the dataset.</p> required <code>n_workers</code> <code>int</code> <p>Number of workers for loading examples fromm the dataset.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>Maximum number of tokens per disk shard.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>Optional method for aggregating segmentation label pixels.</p> <code>MAJORITY</code> <code>shards_root</code> <code>Path</code> <p>Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.</p> required <code>device</code> <code>str</code> <p>Device for doing the computation.</p> required <p>Returns:</p> Type Description <code>Path</code> <p>Path to the shards directory.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -&gt; pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n</code></pre>"},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"<code>conv2d_to_tokens(x_bchw, conv)</code>","text":"<p>Conv2d then flatten spatial to L, return (B, L, D).</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -&gt; b (h w) d\")\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"<code>resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)</code>","text":"<p>Resize image to (w, h) so that:   - w % p == 0, h % p == 0   - (h/p) * (w/p) == N   - Minimizes change in aspect ratio.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -&gt; Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p &lt;= 0 or n &lt;= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist &lt; best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"<code>unfolded_conv2d(x_bchw, conv)</code>","text":"<p>Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -&gt; b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n</code></pre>"},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"<p>Script for dumping SAE inference artifacts in a single pass over the dataset.</p> <p>Default mode writes 5 files:</p> <ol> <li>mean_values.pt</li> <li>sparsity.pt</li> <li>distributions.pt</li> <li>token_acts.npz</li> <li>metrics.json</li> </ol> <p>If save=False, only metrics.json is written.</p> <p>metrics.json is serialized from <code>saev.metrics.Metrics</code>.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"<code>Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for computing image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"<code>data = OrderedConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data configuration</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which accelerator to use.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"<code>force_recompute = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Force recomputation even if files exist.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which token labels to ignore when calculating summarized image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"<code>mem_gb = 80</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"<code>n_dists = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features to save distributions for.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"<code>n_hours = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"<code>run = pathlib.Path('./runs/abcdefg')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Path to the sae.pt file.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"<code>save = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to write token_acts/statistics files. If False, only metrics.json is written.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/inference/#saev.framework.inference.main","title":"<code>main(cfg, sweep=None)</code>","text":"<p>Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config inference.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> Source code in <code>src/saev/framework/inference.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n</code></pre>"},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"<p>Submitit entrypoint modules for SAE workflows.</p> <p><code>saev.framework</code> is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.</p>"},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"<p>To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.</p> <p>This script handles that additional complexity.</p> <p>Conceptually, activations are either thought of as</p> <ol> <li>A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset</li> <li>Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.</li> </ol>"},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"<code>Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs')</code>  <code>dataclass</code>","text":"<p>Configuration for calculating and saving ViT activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"<code>batch_size = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size for ViT inference.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"<code>ckpt = 'ViT-L-14/openai'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Specific model checkpoint.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"<code>cls_token = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the model has a [CLS] token.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"<code>content_tokens_per_example = 256</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dimension of the ViT activations (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"<code>data = dataclasses.field(default_factory=(datasets.Imagenet))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which dataset to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which device to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"<code>family = 'clip'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which model family.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"<code>layers = dataclasses.field(default_factory=(lambda: [-2]))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which layers to save. By default, the second-to-last layer.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"<code>log_to = './logs'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"<code>max_tokens_per_shard = 2400000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"<code>n_workers = 8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloader workers.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"<code>shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to write shards.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"<code>ssl = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use SSL.</p>"},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"<code>cli(cfg)</code>","text":"<p>Save ViT activations for use later on.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Configuration for activations.</p> required Source code in <code>src/saev/framework/shards.py</code> <pre><code>@beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n</code></pre>"},{"location":"api/framework/train/","title":"saev.framework.train","text":"<p>Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.</p> <p>Checklist for making sure your training doesn't suck:</p> <ul> <li>[ ] Data scaling: scale vectors so their average L2 norm is sqrt(n).</li> <li>[ ] Initialize b_e such that each feature activates 10K * d_model / (n * d_sae) of the time, which means that on average, each example activates 10K features.</li> <li>[x] Initialize b_d to 0.</li> <li>[x] Sweep learning rate and sparsity coefficients.</li> <li>[ ] Decay learning rate to 0 over the last 20% of training.</li> <li>[ ] Warmup sparsity over all of training.</li> <li>[x] Gradient clipping (clip at 1 with clip_grad_norm)</li> <li>[x] Track dead latents through training</li> </ul>"},{"location":"api/framework/train/#saev.framework.train.Config","title":"<code>Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for training a sparse autoencoder on a vision transformer.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Hardware device.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"<code>grad_clip = 1.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum gradient norm across all SAE parameters.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"<code>log_every = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How often to log to WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"<code>lr = 0.0004</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Learning rate.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"<code>mem_gb = 128</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"<code>n_lr_warmup = 500</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of learning rate warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"<code>n_sparsity_warmup = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of sparsity coefficient warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"<code>n_train = 100000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE training samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"<code>n_val = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE evaluation samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"<code>objective = nn.objectives.Matryoshka()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE objective configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"<code>optim = 'adam'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Optimizer for training.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"<code>runs_root = pathlib.Path('$SAEV_NFS/saev/runs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory for runs.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"<code>sae = nn.SparseAutoencoderConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"<code>seed = 42</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"<code>tags = ()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tags to add to WandB run.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"<code>track = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to track with WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"<code>train_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Training data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"<code>val_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Validation data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"<code>wandb_project = 'saev'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>WandB project name.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"<code>EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold)</code>  <code>dataclass</code>","text":"<p>Results of evaluating a trained SAE on a datset.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"<code>almost_dead_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for an \"almost dead\" neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"<code>dense_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for a dense neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"<code>freqs</code>  <code>instance-attribute</code>","text":"<p>How often each feature fired.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Mean L0 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Mean L1 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"<code>mean_values</code>  <code>instance-attribute</code>","text":"<p>The mean value for each feature when it did fire.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Mean MSE across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"<code>n_almost_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on fewer than <code>almost_dead_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that never fired on any example.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"<code>n_dense</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on more than <code>dense_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"<code>normalized_mse</code>  <code>instance-attribute</code>","text":"<p>Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"<code>sse_baseline</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the mean baseline.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"<code>sse_sae</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the SAE.</p>"},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"<code>evaluate(cfgs, saes, objectives)</code>","text":"<p>Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.</p> <p>The metrics computed are mean <code>L0</code>/<code>L1</code>/<code>MSE</code> losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of <code>EvalMetrics</code> is returned, one for each SAE.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -&gt; list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x &gt; 0, \"batch d_sae -&gt; d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -&gt; d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens &gt; 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline &gt; 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs &lt; almost_dead_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs &gt; dense_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.main","title":"<code>main(cfg, sweep=None, max_parallel=None)</code>","text":"<p>Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config for training an SAE.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> <code>max_parallel</code> <code>int | None</code> <p>Maximum SAEs to train concurrently within a single worker.</p> <code>None</code> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"<code>split_cfgs(cfgs)</code>","text":"<p>Splits configs into groups that can be parallelized.</p> <p>Parameters:</p> Name Type Description Default <code>cfgs</code> <code>list[Config]</code> <p>A list of configs from a sweep file.</p> required <p>Returns:</p> Type Description <code>list[list[Config]]</code> <p>A list of lists, where the configs in each sublist do not differ in any keys that are in <code>CANNOT_PARALLELIZE</code>. This means that each sublist is a valid \"parallel\" set of configs for <code>train</code>.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -&gt; list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.train","title":"<code>train(cfgs)</code>","text":"<p>Explicitly declare the optimizer, schedulers, dataloader, etc outside of <code>main</code> so that all the variables are dropped from scope and can be garbage collected.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -&gt; tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch &gt; 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse &gt; 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() &gt; 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |&lt;w_i, w_j&gt;| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n</code></pre>"},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"<p>Neural network architectures for sparse autoencoders.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"<code>AuxK(key='auxk', k_aux=512, alpha=1 / 32)</code>  <code>dataclass</code>","text":"<p>AuxK auxiliary reconstruction loss for dead latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"<code>BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero per sample in the batch.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"<code>BatchTopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>BatchTopK activation and inference-time threshold for sparse autoencoders.</p> <p>This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.</p> <p>Training mode (model.train()):     Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly <code>top_k</code> active features per example while allowing the \"activation budget\" to move between examples in the batch.</p> <pre><code>During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n</code></pre> <p>Eval mode (model.eval()):     At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:</p> <pre><code>    y = x if x &gt; theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n</code></pre> Inputs <p>x: Tensor of shape [batch, d_sae] containing pre-activation codes.</p> Outputs <p>Tensor of shape [batch, d_sae] with the same dtype and device as x, where either:     - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or     - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to each sample in the batch.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold &lt;= 0:\n            return torch.where(x &gt; 0, x, torch.zeros_like(x))\n\n        return torch.where(x &gt; self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x &gt; 0]\n        if pos.numel() &gt;= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"<code>NoAux(key='no-aux')</code>  <code>dataclass</code>","text":"<p>No auxiliary loss (e.g., for ReLU).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"<code>NoSparsity(key='no-sparsity')</code>  <code>dataclass</code>","text":"<p>No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"<code>Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux())</code>  <code>dataclass</code>","text":"<p>Vanilla ReLU</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"<code>TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"<code>TopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Top-K activation function. For use as activation function of sparse encoder.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to the input tensor.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"<code>Loss()</code>  <code>dataclass</code>","text":"<p>The loss term for an autoencoder training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"<code>Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000)</code>  <code>dataclass</code>","text":"<p>Config for the Matryoshka loss for another arbitrary SAE class.</p> <p>Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"<code>dead_threshold_tokens = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tokens without activation before a latent is considered dead.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"<code>n_prefixes = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of random length prefixes to use for loss calculation.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"<code>MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>Loss</code></p> <p>The composite loss terms for an training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"<code>aux</code>  <code>instance-attribute</code>","text":"<p>Auxiliary loss term (e.g., AuxK).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Sum of L0 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Sum of L1 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Average of reconstruction loss (mean squared error) for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of dead latents (per aux loss threshold).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"<code>sparsity</code>  <code>instance-attribute</code>","text":"<p>Sparsity loss, typically lambda * L1.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"<code>MatryoshkaObjective(cfg)</code>","text":"<p>               Bases: <code>Objective</code></p> <p>Torch module for calculating the matryoshka loss for an SAE.</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n</code></pre>"},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"<code>sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)</code>","text":"<p>Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)</p> <p>Parameters:</p> Name Type Description Default <code>d_sae</code> <code>int</code> <p>Total number of latent dimensions</p> required <code>n_prefixes</code> <code>int</code> <p>Number of prefixes to sample</p> required <code>min_prefix_length</code> <code>int</code> <p>Minimum length of any prefix</p> <code>1</code> <code>pareto_power</code> <code>float</code> <p>Power parameter for Pareto distribution (lower = more uniform)</p> <code>0.5</code> <p>Returns:</p> Type Description <code>Int64[Tensor, ' n_prefixes']</code> <p>torch.Tensor: Sorted prefix lengths</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>@torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -&gt; Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes &lt;= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes &lt;= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n</code></pre>"},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"<code>DataloaderMonitor(dataloader, process_factory=None)</code>","text":"<p>Tracks IO and CPU activity for the dataloader manager process and its children.</p> <p>The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call <code>compute()</code> whenever metrics are needed.</p> Source code in <code>src/saev/utils/monitoring.py</code> <pre><code>def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -&gt; None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n</code></pre>"},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"<code>BatchLimiter(dataloader, n_samples)</code>","text":"<p>Limits the number of batches to only return <code>n_samples</code> total samples.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"<code>__getattr__(name)</code>","text":"<p>Pass through attribute access to the wrapped dataloader.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __getattr__(self, name: str) -&gt; Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"<code>Warmup(init, final, n_steps)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>final</code> over <code>n_warmup_steps</code> steps.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"<code>WarmupCosine(init, n_warmup, peak, n_steps, final)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>peak</code> over <code>n_warmup</code> steps, then decrease down to final using cosine decay over n_steps - n_warmup.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"<code>PercentileEstimator(percentile, total, lr=0.001, shape=())</code>","text":"Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"<code>update(x)</code>","text":"<p>Update the estimator with a new value.</p> <p>This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>float | Tensor</code> <p>The new value to incorporate into the estimation</p> required Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"<code>calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)</code>","text":"<p>Compute entropy and coverage metrics for a batch of shuffled indices.</p> <p>The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.</p> Source code in <code>src/saev/utils/statistics.py</code> <pre><code>@beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -&gt; dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples &lt;= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example &lt;= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n</code></pre>"},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"<code>ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')</code>","text":"<p>Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387</p> Source code in <code>src/saev/utils/wandb.py</code> <pre><code>def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project, config=cfg, mode=mode, tags=tags, dir=dir\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n</code></pre>"},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"<pre><code>docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n</code></pre>"},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"<p>Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.</p> <p>Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:</p> <ol> <li>Select \\(n\\) random data points from your training data.</li> <li>Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).</li> <li>Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.</li> <li>Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.</li> <li>Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).</li> </ol> <p>Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).</p>"},{"location":"developers/disk-layout/","title":"Storage &amp; Run Manifest Spec (v1)","text":"<p>There are two main locations:</p> <ol> <li><code>$SAEV_SCRATCH/saev/shards</code>: where we store transformer activations (referred to as <code>shards_root</code> in the codebase).</li> <li><code>$SAEV_NFS/saev/runs</code>: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as <code>runs_root</code> in the codebase).</li> </ol> <p>Visually, these are:</p> <pre><code>$SAEV_SCRATCH/saev/\n  shards/\n    &lt;shard_hash&gt;/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n</code></pre> <p>and</p> <pre><code>$SAEV_NFS/saev/\n  runs/\n    &lt;run_id&gt;/\n      checkpoint/           # output of train.py on &lt;shard_hash&gt;\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        &lt;shard_hash&gt;/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n</code></pre> <p>Each <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/</code> MUST include:</p> <ul> <li><code>metadata.json</code> (UTF-8, canonical spec; see <code>protocol.md</code>)</li> <li><code>shards.json</code> (UTF-8, shard index and sizes; see <code>protocol.md</code>)</li> <li><code>acts*.bin</code> (binary shards; format in <code>protocol.md</code>)</li> <li><code>labels.bin</code> (binary patch labels aligned to shards; format in <code>protocol.md</code>)</li> </ul> <p>Note</p> <p>Immutability: Files under <code>saev/shards/&lt;shard_hash&gt;/</code> MUST be treated as read-only after publication. Any change yields a new <code>shard_hash</code>.</p> <p>All CLI entrypoints should accept a single <code>--run &lt;path&gt;</code> argument. Every other path MUST be resolved from the run root:</p> <ul> <li>ViT activations: <code>links/shards</code> \u2192 <code>saev/shards/&lt;shard_hash&gt;</code></li> <li>Dataset: <code>links/dataset</code> \u2192 Dataset root, wherever it is on disk.</li> <li>SAE checkpoint: <code>checkpoint/sae.pt</code></li> </ul> <p>Example resolution:</p> <pre><code>run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n</code></pre> <ul> <li><code>$SAEV_SCRATCH</code> and <code>$SAEV_NFS</code> should be set for all users/processes running saev tools.</li> </ul>"},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":"<ul> <li> <p>Where do patch labels live? Next to <code>acts*.bin</code> in <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/labels.bin</code>. Scripts discover them via <code>links/shards/labels.bin</code>.</p> </li> <li> <p>Can I put datasets directly in <code>$SAEV_SCRATCH</code>? Sure, but not in <code>$SAEV_SCRATCH/shards</code>.</p> </li> </ul>"},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"<p>saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). <code>saev.data</code> has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance. </p> <p>Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:</p> <ul> <li>mem-mappable</li> <li>Parameterized solely by the experiment configuration (<code>scripts/shards.py:Config</code>)</li> <li>Referenced by a content-hash, so identical configs collide, divergent ones never do</li> <li>Can be read quickly in a random order for training, and can be read (slowly) with random-access for visuals.</li> </ul> <p>This document is the single normative source. Any divergence in code is a bug.</p>"},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"<pre><code>&lt;dump_to&gt;/&lt;HASH&gt;/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n</code></pre> <p><code>HASH</code> = <code>sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8'))</code> Guards against silent config drift.</p>"},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. <code>metadata.json</code>","text":"field type semantic <code>family</code> string <code>\"clip\" \\| \"siglip\" \\| \"dinov2\"</code> <code>ckpt</code> string model identifier (OpenCLIP, HF, etc.) <code>layers</code> int[] ViT residual\u2010block indices recorded <code>patches_per_ex</code> int example patches only (excludes CLS) <code>cls_token</code> bool <code>true</code> -&gt; patch 0 is CLS, else no CLS <code>d_model</code> int activation dimensionality <code>n_examples</code> int total examples in dataset <code>patches_per_shard</code> int logical activations per shard (see #3) <code>data</code> object opaque dataset description <code>dataset</code> string absolute path to original dataset root <code>dtype</code> string numpy dtype. Fixed <code>\"float32\"</code> for now. <code>protocol</code> string <code>\"2.1\"</code> (shards after big refactor) <p>The <code>data</code> object is <code>base64.b64encode(pickle.dumps(img_ds)).decode('utf8')</code>.</p> <p>The <code>dataset</code> field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.</p>"},{"location":"developers/protocol/#22-shardsjson","title":"2.2. <code>shards.json</code>","text":"<p>A single array of <code>shard</code> objects, each of which has the following fields:</p> field type semantic name string shard filename (<code>acts000000.bin</code>). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"<pre><code>tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard, len(layers), tokens_per_ex, d_model,\n)\n</code></pre> <p><code>patches_per_shard</code> is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ <code>d_model = 1024</code>.</p> <p>The last shard will have a smaller value for <code>examples_per_shard</code>; this value is documented in <code>n_examples</code> in <code>shards.json</code></p>"},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"<p>The entire dataset of activations is treated as a single logical 4D tensor with the shape <code>(n_examples, len(layers), tokens_per_ex, d_model)</code>. This logical tensor is C-contiguous with axes ordered <code>[Example, Layer, Token, Dimension]</code>.</p> <p>Physically, this tensor is split along the first axis (<code>Example</code>) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.</p> <p>To locate an arbitrary activation vector, a reader must convert a logical coordinate (<code>global_ex_idx</code>, <code>layer_value</code>, <code>token_idx</code>) into a file path and an offset within that file.</p>"},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"<p>Let the parameters from <code>metadata.json</code> be:</p> <ul> <li>L = <code>len(layers)</code></li> <li>P = <code>patches_per_ex</code></li> <li>T = <code>P + (1 if cls_token else 0)</code> (Total tokens per example)</li> <li>D = <code>d_model</code></li> <li>S = <code>n_examples</code> from <code>shards.json</code> or <code>examples_per_shard</code> from Section 3 (shard sizing).</li> </ul>"},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"<p>Given a logical coordinate:</p> <ul> <li><code>global_ex_idx</code>: integer, with <code>0 &lt;= global_ex_idx &lt; n_examples</code></li> <li><code>layer</code>: integer, must be an element of <code>layers</code></li> <li><code>token_idx</code>: integer, <code>0 &lt;= token_idx &lt; T</code></li> </ul> <p>The physical location is found as follows:</p> <ol> <li> <p>Identify Shard:</p> <ul> <li><code>shard_idx = global_ex_idx // S</code></li> <li><code>ex_in_shard = global_ex_idx % S</code> The target file is <code>acts{shard_idx:06d}.bin</code>.</li> </ul> </li> <li> <p>Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical <code>layer_value</code> must be mapped to its index in the stored <code>layers</code> array.</p> <ul> <li><code>layer_idx = layers.index(layer)</code> A reader must raise an error if <code>layer</code> is not in <code>layers</code>.</li> </ul> </li> <li> <p>Calculate Offset: The data within a shard is a 4D tensor of shape <code>(S, L, T, D)</code>. The offset to the first byte of the desired activation vector <code>[ex_in_shard, layer_idx , token_idx]</code> is:</p> <ul> <li><code>offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx</code></li> <li><code>offset_in_bytes = offset_in_vectors * D * 4</code> (assuming 4 bytes for <code>float32</code>)</li> </ul> </li> </ol> <p>A reader can then seek to <code>offset_in_bytes</code> and read \\(D \\times 4\\) bytes to retrieve the vector.</p> <p>Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.</p>"},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"<p>The <code>token</code> axis of length \\(T\\) is ordered as follows: * If <code>cls_token</code> is <code>true</code>:     * Index <code>0</code>: [CLS] token activation     * Indices <code>1</code> to \\(P\\): Patch token activations * If <code>cls_token</code> is <code>false</code>:     * Indices <code>0</code> to \\(P-1\\): Patch token activations</p> <p>The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.</p>"},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning &amp; compatibility","text":"<ul> <li>Major changes (shape reorder, dtype switch, new required JSON keys) increment the major protocol version number at the top of this document and must emit a breaking warning in loader code.</li> <li>Minor, backward-compatible additions (new optional JSON key) merely update this doc and the minor protocol version number.</li> </ul> <p>That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.</p>"},{"location":"developers/workflows/","title":"Workflows","text":"<ol> <li>Generate inference activations (and thus visuals) for both training and validation splits.</li> </ol>"},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"<p>This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (<code>block.norm2</code>) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.</p>"},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"<p>While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.</p>"},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"<p>First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.</p> <p></p> <p>I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.</p> <p></p>"},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"<p>Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.</p> <p></p> <p>My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:</p> <pre><code>bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n</code></pre> <p>Something is broken inside of BirdMAE.</p>"},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"<p>Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.</p> <p></p> <p>BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.</p> <p></p> <p>Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.</p>"},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"<p>Below is the output from the attention layers (Graph #2) in our architecture diagram.</p> <p></p> <p>Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.</p> <p></p> <p>Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?</p> <p>Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.</p> <p></p> <p><code>fc2</code> has a <code>weight</code> parameter with shape (4096, 1024) and a <code>bias</code> parameter with shape (1024,). I take the L2 norm of <code>fc2.weight</code>'s columns to see if col 296/1024 is different.</p> <p></p> <p><code>fc2.weight</code> does appear to be different, and abnormally large (note the log scale). <code>fc2.bias</code> is also different, but it's not immediately obvious what's going on there to me.</p>"},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"<p>This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.</p> <p>We verified this by inspecting <code>norm2.weight</code> across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.</p> <p>The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.</p>"},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"<p>The fix is to record activations after <code>block.norm2</code> (the pre-MLP LayerNorm) instead of from the raw residual stream. In <code>saev</code>, this is implemented as:</p> <pre><code>def get_residuals(self) -&gt; list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n</code></pre> <p>After this change, the outlier is suppressed and SAE training works normally.</p>"},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"<ol> <li>Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.</li> <li>Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.</li> <li>Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.</li> </ol>"},{"location":"users/glossary/","title":"Glossary","text":"<p>Definitions for words used in the code and documentation.</p> <ul> <li>example: one dataset item (image, sentence, audio clip, point cloud, graph instance).</li> <li>token: one model position in the encoder\u2019s residual stream (the thing with hidden size <code>d_model</code>). Always \"token\" inside the model.</li> <li>content token: tokens derived from the raw input (image patches, wordpieces, audio windows, nodes, etc.).</li> <li>special token: tokens not directly derived from the raw input (class/summary token, [SEP], [MASK], [PAD], register tokens, etc.).</li> <li>sequence length L: total tokens per example (content + special). If variable, call it \u201cragged\u201d.</li> <li>layer: an integer index into the encoder\u2019s stack.</li> <li>activation kind (optional but useful): which stream you saved (e.g., resid_pre, resid_post, mlp_out, attn_out, qkv, head_out).</li> </ul> <p>Modality-specific vocab:</p> <ul> <li>patch (vision): a 2D content token. Often laid out on a grid with shape (H_patches, W_patches).</li> <li>frame/token or tube (video): content token in time \u00d7 space; often (T, H, W).</li> <li>wordpiece / subword (text): content token from a tokenizer.</li> <li>window / frame (audio): time\u2013frequency window.</li> <li>node (graph), point (point cloud).</li> </ul>"},{"location":"users/guide/","title":"Guide","text":"<p>This guide explains how to transition from the ADE20K demo to using <code>saev</code> with your own custom datasets.</p> <p>Here are the steps:</p> <ol> <li>Save ViT activations to disk</li> <li>Train SAEs on activations</li> <li>Evaluate the SAE checkpoints</li> <li>Visualize Learned Features</li> </ol> <p>Note</p> <p><code>saev</code> assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with <code>CUDA_VISIBLE_DEVICES=X</code> to run on GPU X.</p>"},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"<p>To save activations to disk, we need to specify:</p> <ol> <li>Which model we would like to use</li> <li>Which layers we would like to save.</li> <li>Where on disk and how we would like to save activations.</li> <li>Which images we want to save activations for.</li> </ol> <p>The <code>saev/framework/shards.py</code> script does all of this for us.</p> <p>Run <code>uv run launch.py shards --help</code> to see all the configuration.</p> <p>In practice, you might run:</p> <pre><code>uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n</code></pre> <p>This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory <code>/fs/scratch/PAS2136/samuelstevens/saev/shards</code>.</p> <p>Note</p> <p>A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.</p> <p>This script will also save a <code>metadata.json</code> file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in <code>.bin</code> files, numbered starting from 000000.</p> <p>To add your own models, see the guide to extending in <code>saev.activations</code>.</p>"},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"<p>To train an SAE, we need to specify:</p> <ol> <li>Which activations to use as input.</li> <li>SAE architectural stuff.</li> <li>Optimization-related stuff.</li> </ol> <p>The <code>train.py</code> script handles this.</p> <p>Run <code>uv run train.py --help</code> to see all the configuration.</p> <p>The most important options are:</p> <ul> <li><code>--runs-root</code>: where to store runs.</li> <li><code>--train-data</code> and <code>--val-data</code>: How to load the training and validation data. You probably want to specify both <code>--{train,val}-data.shards</code> (the shard directory) and <code>--{train,val}-data.layer</code> (which layer to use).</li> <li><code>sae.activation</code>: <code>sae.activation:relu</code> to use the ReLU activation.</li> </ul> <p>This is a full example:</p> <pre><code>uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n</code></pre> <p>This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.</p>"},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"<p>The training loop logs additional loader diagnostics derived from <code>calc_batch_entropy</code> in <code>train.py</code>. Every batch contributes two entropy measurements in natural log units:</p> <ul> <li><code>loader/example_entropy</code> and <code>loader/example_entropy_normalized</code> summarize how evenly the shuffled loader samples example indices. Normalization divides the raw entropy by <code>ln(metadata.n_examples)</code> so perfectly uniform sampling is 1.0.</li> <li><code>loader/token_entropy</code> and <code>loader/token_entropy_normalized</code> do the same for patch indices using <code>ln(metadata.content_tokens_per_example)</code> as the normalizer.</li> <li><code>loader/example_coverage</code> and <code>loader/token_coverage</code> report the fraction of distinct example or patch indices seen in the current batch relative to their theoretical support.</li> </ul> <p>All eight metrics appear alongside the existing <code>loader/read_mb</code> counters, helping spot skewed sampling or under-covered patches mid-run.</p>"},{"location":"users/guide/#evaluation","title":"Evaluation","text":"<p>After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch <code>torch.nn.Module</code> in combination with a <code>saev.data.OrderedDataLoader</code> or <code>saev.data.IndexedDataset</code>.</p> <p>However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The <code>saev/framework/inference.py</code> script calculates these metrics. You can run <code>uv run launch.py inference --help</code> to see all the options.</p> <p>The most important options are:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--data</code>: The options for the OrderedDataLoader. Specifically, you need to set <code>--data.shards</code> and <code>--data.layer</code>, just like for training.</li> </ul> <pre><code>uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n</code></pre>"},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"<p>Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse <code>token_acts.npz</code> file from the previous inference step.</p> <p>Warning</p> <p>Because there are so many different ways to visualize SAE features, I moved it to <code>contrib/trait_discovery</code> (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").</p> <p>The most important options:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--shards</code>: The shards directory.</li> <li><code>--latents</code>: The 0-indexed latents to save images for.</li> <li><code>--n-latents</code>: The number of randomly selected latents to save images for.</li> </ul> <p>So first, move into the <code>contrib/trait_discovery</code>:</p> <pre><code>cd contrib/trait_discovery\n</code></pre> <p>Then run the script that generates highlighted images:</p> <pre><code>uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n</code></pre> <p>Note</p> <p>Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (<code>--include-latents</code>). I recommend using <code>saev/interactive/metrics.py</code> with marimo to figure out good thresholds.</p>"},{"location":"users/guide/#sweeps","title":"Sweeps","text":"<p>tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.</p>"},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"<p>SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.</p> <p>A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.</p>"},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"<p>To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:</p> <pre>\nflowchart TD\n    A[Pre-computed ViT Activations] --&gt;|Slow I/O| B[Memory Buffer]\n    B --&gt;|Shared Batch| C[SAE Model 1]\n    B --&gt;|Shared Batch| D[SAE Model 2]\n    B --&gt;|Shared Batch| E[SAE Model 3]\n    B --&gt;|Shared Batch| F[...]\n</pre> <p>This approach:</p> <ul> <li>Loads each batch of activations once from disk</li> <li>Uses that same batch for multiple SAE models with different hyperparameters</li> <li>Amortizes the slow I/O cost across all models in the sweep</li> </ul>"},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"<p>The <code>train</code> command accepts a <code>--sweep</code> parameter that points to a TOML file defining the hyperparameter grid:</p> <pre><code>uv run python -m saev train --sweep configs/my_sweep.toml\n</code></pre> <p>Here's an example sweep configuration file:</p> <pre><code>[sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n</code></pre> <p>This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.</p>"},{"location":"users/guide/#limitations","title":"Limitations","text":"<p>Not all parameters can be swept in parallel. Parameters that affect data loading (like <code>batch_size</code> or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.</p>"},{"location":"users/inference/","title":"Inference","text":"<p>If you want to get started quickly, try the inference notebook in marimo or on Google Colab.</p> <p>Briefly, you need to:</p> <ol> <li>Download a checkpoint.</li> <li>Get the code.</li> <li>Load the checkpoint.</li> <li>Get activations.</li> </ol> <p>Details are below.</p>"},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"<p>First, download an SAE checkpoint from the Huggingface collection.</p>"},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"<p>Some repos (CLIP, BioCLIP, DINOv2) contain a single <code>sae.pt</code> at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.</p> <p>You can use <code>wget</code> if you want:</p> <pre><code>wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n</code></pre>"},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"<p>The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a <code>manifest.jsonl</code> with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.</p> <p>Download a specific checkpoint:</p> <pre><code>from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n</code></pre> <p>Download all checkpoints in a repo:</p> <pre><code>from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n</code></pre> <p>Available DINOv3 repos:</p> <ul> <li>osunlp/SAE_DINOv3_ViT-S-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-B-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-L-16_IN1K (layers 13-23)</li> <li>osunlp/SAE_DINOv3_TopK_ViT-L-16_IN1K (layers 13-23)</li> </ul>"},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"<p>The easiest way to do this is to clone the code:</p> <pre><code>git clone https://github.com/Imageomics/saev\n</code></pre> <p>You can also install the package from git if you use uv (not sure about pip or cuda):</p> <pre><code>uv add git+https://github.com/Imageomics/saev\n</code></pre> <p>Or clone it and install it as an editable with pip, lik <code>pip install -e .</code> in your virtual environment.</p> <p>Then you can do things like <code>from saev import ...</code>.</p> <p>Note</p> <p>If you struggle to get <code>saev</code> installed, open an issue on GitHub and I will figure out how to make it easier.</p>"},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"<pre><code>import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n</code></pre> <p>Now you have a pretrained SAE.</p>"},{"location":"users/inference/#get-activations","title":"Get Activations","text":"<p>This is the hardest part. We need to:</p> <ol> <li>Pass an image into a ViT</li> <li>Record the dense ViT activations at the same layer that the SAE was trained on.</li> <li>Pass the activations into the SAE to get sparse activations.</li> <li>Do something interesting with the sparse SAE activations.</li> </ol> <p>There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the <code>get_sae_latents()</code> functions in both files.</p> <p>Below is example code to do it using the <code>saev</code> package.</p> <pre><code>import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n</code></pre> <p>Now you have the sparse representation of all patches in the image (<code>out.f_x</code>) and the reconstructed activations (<code>out.x_hats</code>).</p> <p>You might select the dimensions with maximal values for each patch and see what other images are maximally activating.</p>"},{"location":"users/new-project/","title":"New Project Structure","text":"<p>saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.</p> <p>TL;DR:</p> <ol> <li>Fork saev.</li> <li>Clone your fork.</li> <li>Create a new directory in <code>contrib/</code>.</li> <li>Update both <code>src/saev</code> and your new contrib directory as necessary.</li> <li>(Hopefully) publish.</li> <li>If your changes to <code>src/saev</code> are broadly useful and not overly restrictive, open a PR with your changes to <code>src/saev</code>.</li> </ol> <p>I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.</p> <p>First, fork and clone saev. Do this however you want, but GitHub has a guide on it.</p> <p>Second, you probably want to store code related to your project in this repo. Make a new directory in <code>contrib/</code>. I'm calling my new subproject \"birdsong.\"</p> <pre><code>[I] samuelstevens@host ~/p/saev (main)&gt; tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n</code></pre> <p>Use <code>uv</code> to make a new package inside your new project:</p> <pre><code>[I] samuelstevens@host ~/p/s/c/birdsong (main)&gt; uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n</code></pre> <p>Now you have some additional files.</p> <pre><code>[I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)&gt; tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n</code></pre> <p>Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new <code>birdsong/notebooks</code> directory, and will add <code>birdsong/logbook.md</code> to store ongoing TODO items, and so on.</p> <p>To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit <code>src/saev/data/datasets.py</code>.</p> <p>I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in <code>src/saev/data</code>.</p> <p>If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with <code>birdsong</code> because that's specific to me, rather than to the library.<sup>1</sup></p> <ol> <li> <p>Technically, <code>birdsong</code> will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9</p> </li> </ol>"},{"location":"users/sweeps/","title":"Sweeps","text":"<p>Hyperparameter sweeps in <code>saev</code> train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.</p>"},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"<p>Create a Python file defining your sweep:</p> <pre><code># sweeps/my_sweep.py\n\ndef make_cfgs() -&gt; list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Run the sweep:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n</code></pre> <p>This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.</p>"},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"<p>SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:</p> <pre><code>\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n</code></pre>"},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"<p>Python sweeps give you full control over config generation. Your sweep file must define a <code>make_cfgs()</code> function that returns a list of dicts.</p> <p>Grid search example:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Paired parameters (not a grid):</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).</p> <p>Conditional sweeps:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae &lt;= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"<p>Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI &gt; Sweep &gt; Default.</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n</code></pre> <p>Override nested config fields with dotted notation:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n</code></pre> <p>Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.</p>"},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"<p>Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like <code>train_data</code>, <code>n_train</code>, <code>device</code>) must be identical across all configs in a parallel group.</p> <p>When configs differ in these parameters, they're automatically split into separate Slurm jobs:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]</p> <p>Implementation detail</p> <p>See <code>CANNOT_PARALLELIZE</code> in <code>train.py</code> for the full list of parameters that split parallel groups. The <code>split_cfgs()</code> function handles grouping automatically.</p>"},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"<p>Your sweep file is executed as a Python module, so you can use imports and helper functions:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Import mechanics</p> <p>The sweep file is loaded with <code>importlib.import_module()</code>, so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a <code>sweeps/</code> subdirectory).</p>"},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"<p>When running with <code>--slurm-acct</code>, each parallel group becomes a separate Slurm job:</p> <pre><code>uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n</code></pre> <p>The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results</p>"},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"<p>Seeds are automatically incremented for each config to ensure reproducibility:</p> <pre><code># Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n</code></pre> <p>Override the base seed on the command line:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py --seed 100\n</code></pre>"},{"location":"users/sweeps/#examples","title":"Examples","text":"<p>Simple grid:</p> <pre><code># sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n</code></pre> <p>Layer sweep with paired train/val:</p> <pre><code># sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Architecture sweep:</p> <pre><code># sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"}]}
\ No newline at end of file
diff --git a/docs/api/sitemap.xml b/docs/api/sitemap.xml
index 178dc5c..d9f457c 100644
--- a/docs/api/sitemap.xml
+++ b/docs/api/sitemap.xml
@@ -1,199 +1,199 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/</loc>
+         <loc>https://imageomics.github.io/saev/api/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/colors/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/colors/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/configs/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/configs/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/disk/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/disk/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/helpers/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/helpers/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/metrics/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/metrics/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/saev/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/saev/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/summary/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/summary/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/viz/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/viz/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/bird_mae/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/bird_mae/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/buffers/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/buffers/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/clip/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/clip/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/datasets/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/datasets/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/dinov2/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/dinov2/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/dinov3/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/dinov3/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/fake_clip/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/fake_clip/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/indexed/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/indexed/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/models/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/models/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/ordered/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/ordered/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/pe/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/pe/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/saev.data/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/saev.data/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/shards/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/shards/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/shuffled/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/shuffled/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/siglip/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/siglip/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/data/transforms/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/data/transforms/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/framework/inference/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/framework/inference/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/framework/saev.framework/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/framework/saev.framework/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/framework/shards/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/framework/shards/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/framework/train/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/framework/train/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/nn/modeling/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/nn/modeling/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/nn/objectives/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/nn/objectives/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/nn/saev.nn/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/nn/saev.nn/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/utils/monitoring/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/utils/monitoring/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/utils/saev.utils/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/utils/saev.utils/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/utils/scheduling/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/utils/scheduling/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/utils/statistics/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/utils/statistics/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/api/utils/wandb/</loc>
+         <loc>https://imageomics.github.io/saev/api/api/utils/wandb/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/developers/contributing/</loc>
+         <loc>https://imageomics.github.io/saev/api/developers/contributing/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/developers/datapoint-init/</loc>
+         <loc>https://imageomics.github.io/saev/api/developers/datapoint-init/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/developers/disk-layout/</loc>
+         <loc>https://imageomics.github.io/saev/api/developers/disk-layout/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/developers/naming/</loc>
+         <loc>https://imageomics.github.io/saev/api/developers/naming/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/developers/protocol/</loc>
+         <loc>https://imageomics.github.io/saev/api/developers/protocol/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/developers/workflows/</loc>
+         <loc>https://imageomics.github.io/saev/api/developers/workflows/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/users/bird-mae-debugging/</loc>
+         <loc>https://imageomics.github.io/saev/api/users/bird-mae-debugging/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/users/glossary/</loc>
+         <loc>https://imageomics.github.io/saev/api/users/glossary/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/users/guide/</loc>
+         <loc>https://imageomics.github.io/saev/api/users/guide/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/users/inference/</loc>
+         <loc>https://imageomics.github.io/saev/api/users/inference/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/users/new-project/</loc>
+         <loc>https://imageomics.github.io/saev/api/users/new-project/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
     <url>
-         <loc>https://osu-nlp-group.github.io/saev/api/users/sweeps/</loc>
+         <loc>https://imageomics.github.io/saev/api/users/sweeps/</loc>
          <lastmod>2026-03-06</lastmod>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/docs/api/users/bird-mae-debugging/index.html b/docs/api/users/bird-mae-debugging/index.html
index ce71365..e945e8a 100644
--- a/docs/api/users/bird-mae-debugging/index.html
+++ b/docs/api/users/bird-mae-debugging/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/users/bird-mae-debugging/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/users/bird-mae-debugging/">
       
       
         <link rel="prev" href="../sweeps/">
@@ -2229,7 +2229,7 @@ <h2 id="lessons">Lessons<a class="headerlink" href="#lessons" title="Permanent l
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/users/glossary/index.html b/docs/api/users/glossary/index.html
index e917ae2..dbda65d 100644
--- a/docs/api/users/glossary/index.html
+++ b/docs/api/users/glossary/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/users/glossary/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/users/glossary/">
       
       
         <link rel="prev" href="../new-project/">
@@ -2048,7 +2048,7 @@ <h1 id="glossary">Glossary<a class="headerlink" href="#glossary" title="Permanen
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/users/guide/index.html b/docs/api/users/guide/index.html
index a9f8f06..e6fa015 100644
--- a/docs/api/users/guide/index.html
+++ b/docs/api/users/guide/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/users/guide/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/users/guide/">
       
       
         <link rel="prev" href="../..">
@@ -2371,7 +2371,7 @@ <h3 id="limitations">Limitations<a class="headerlink" href="#limitations" title=
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/users/inference/index.html b/docs/api/users/inference/index.html
index bb07909..e2a6eb8 100644
--- a/docs/api/users/inference/index.html
+++ b/docs/api/users/inference/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/users/inference/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/users/inference/">
       
       
         <link rel="prev" href="../guide/">
@@ -2046,7 +2046,7 @@
 
 <h1 id="inference">Inference<a class="headerlink" href="#inference" title="Permanent link">&para;</a></h1>
 <blockquote>
-<p>If you want to get started quickly, try the <a href="https://github.com/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb">inference notebook</a> in marimo or on <a href="https://colab.research.google.com/github/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb">Google Colab</a>.</p>
+<p>If you want to get started quickly, try the <a href="https://github.com/Imageomics/saev/blob/main/examples/inference.ipynb">inference notebook</a> in marimo or on <a href="https://colab.research.google.com/github/Imageomics/saev/blob/main/examples/inference.ipynb">Google Colab</a>.</p>
 </blockquote>
 <p>Briefly, you need to:</p>
 <ol>
@@ -2085,16 +2085,16 @@ <h3 id="multi-checkpoint-repos">Multi-checkpoint repos<a class="headerlink" href
 </ul>
 <h2 id="get-the-code">Get the Code<a class="headerlink" href="#get-the-code" title="Permanent link">&para;</a></h2>
 <p>The easiest way to do this is to clone the code:</p>
-<pre><code>git clone https://github.com/OSU-NLP-Group/saev
+<pre><code>git clone https://github.com/Imageomics/saev
 </code></pre>
 <p>You can also install the package from git if you use uv (not sure about pip or cuda):</p>
-<pre><code class="language-sh">uv add git+https://github.com/OSU-NLP-Group/saev
+<pre><code class="language-sh">uv add git+https://github.com/Imageomics/saev
 </code></pre>
 <p>Or clone it and install it as an editable with pip, lik <code>pip install -e .</code> in your virtual environment.</p>
 <p>Then you can do things like <code>from saev import ...</code>.</p>
 <div class="admonition note">
 <p class="admonition-title">Note</p>
-<p>If you struggle to get <code>saev</code> installed, open an issue on <a href="https://github.com/OSU-NLP-Group/saev">GitHub</a> and I will figure out how to make it easier.</p>
+<p>If you struggle to get <code>saev</code> installed, open an issue on <a href="https://github.com/Imageomics/saev">GitHub</a> and I will figure out how to make it easier.</p>
 </div>
 <h2 id="load-the-checkpoint">Load the Checkpoint<a class="headerlink" href="#load-the-checkpoint" title="Permanent link">&para;</a></h2>
 <pre><code class="language-py">import saev.nn
@@ -2234,7 +2234,7 @@ <h2 id="get-activations">Get Activations<a class="headerlink" href="#get-activat
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/users/new-project/index.html b/docs/api/users/new-project/index.html
index 9be1ba2..ffbab7b 100644
--- a/docs/api/users/new-project/index.html
+++ b/docs/api/users/new-project/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/users/new-project/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/users/new-project/">
       
       
         <link rel="prev" href="../inference/">
@@ -2085,7 +2085,7 @@ <h1 id="new-project-structure">New Project Structure<a class="headerlink" href="
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/api/users/sweeps/index.html b/docs/api/users/sweeps/index.html
index c9ca900..35da9bd 100644
--- a/docs/api/users/sweeps/index.html
+++ b/docs/api/users/sweeps/index.html
@@ -8,7 +8,7 @@
       
       
       
-        <link rel="canonical" href="https://osu-nlp-group.github.io/saev/api/users/sweeps/">
+        <link rel="canonical" href="https://imageomics.github.io/saev/api/users/sweeps/">
       
       
         <link rel="prev" href="../glossary/">
@@ -2405,7 +2405,7 @@ <h2 id="examples">Examples<a class="headerlink" href="#examples" title="Permanen
       
       
     
-    <a href="https://github.com/OSU-NLP-Group/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
+    <a href="https://github.com/Imageomics/saev" target="_blank" rel="noopener" title="github.com" class="md-social__link">
       <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
     </a>
   
diff --git a/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md b/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md
index cc5637f..da58371 100644
--- a/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md
+++ b/docs/assets/modelcards/SAE_BioCLIP_24K_ViT-B-16_iNat21.md
@@ -4,15 +4,15 @@ license: mit
 
 # SAE for Imageomics's BioCLIP ViT-B/16 trained on iNat2021 Activations
 
-![Overview of a the features found by a BioCLIP-trained SAE](https://osu-nlp-group.github.io/saev/assets/overview2-bioclip.webp)
+![Overview of a the features found by a BioCLIP-trained SAE](https://imageomics.github.io/saev/assets/overview2-bioclip.webp)
 
-* **Homepage:** https://osu-nlp-group.github.io/saev
-* **Code:** https://github.com/OSU-NLP-Group/saev
+* **Homepage:** https://imageomics.github.io/saev
+* **Code:** https://github.com/Imageomics/saev
 * **Preprint:** https://arxiv.org/abs/2502.06755
-* **Demos:** https://osu-nlp-group.github.io/saev#demos
+* **Demos:** https://imageomics.github.io/saev#demos
 * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu)
 
 ## Inference Instructions
 
-Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions).
+Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions).
 
diff --git a/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md b/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md
index e55df03..7ecac82 100644
--- a/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md
+++ b/docs/assets/modelcards/SAE_CLIP_24K_ViT-B-16_IN1K.md
@@ -4,14 +4,14 @@ license: mit
 
 # SAE for OpenAI's CLIP ViT-B/16 trained on ImageNet-1K Activations
 
-![Overview of a CLIP-trained SAE](https://osu-nlp-group.github.io/saev/assets/overview2.webp)
+![Overview of a CLIP-trained SAE](https://imageomics.github.io/saev/assets/overview2.webp)
 
-* **Homepage:** https://osu-nlp-group.github.io/saev
-* **Code:** https://github.com/OSU-NLP-Group/saev
+* **Homepage:** https://imageomics.github.io/saev
+* **Code:** https://github.com/Imageomics/saev
 * **Preprint:** https://arxiv.org/abs/2502.06755
-* **Demos:** https://osu-nlp-group.github.io/saev#demos
+* **Demos:** https://imageomics.github.io/saev#demos
 * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu)
 
 ## Inference Instructions
 
-Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions).
+Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions).
diff --git a/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md b/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md
index 1c86474..11b772b 100644
--- a/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md
+++ b/docs/assets/modelcards/SAE_DINOv2_24K_ViT-B-14_IN1K.md
@@ -4,12 +4,12 @@ license: mit
 
 # SAE for Meta's DINOv2 ViT-B/14 trained on ImageNet-1K Activations
 
-* **Homepage:** https://osu-nlp-group.github.io/saev
-* **Code:** https://github.com/OSU-NLP-Group/saev
+* **Homepage:** https://imageomics.github.io/saev
+* **Code:** https://github.com/Imageomics/saev
 * **Preprint:** https://arxiv.org/abs/2502.06755
-* **Demos:** https://osu-nlp-group.github.io/saev#demos
+* **Demos:** https://imageomics.github.io/saev#demos
 * **Point of Contact:** [Sam Stevens](mailto:stevens.994@buckeyemail.osu.edu)
 
 ## Inference Instructions
 
-Follow the instructions [here](https://osu-nlp-group.github.io/saev/api/saev/#inference-instructions).
+Follow the instructions [here](https://imageomics.github.io/saev/api/saev/#inference-instructions).
diff --git a/docs/demos/classification/dist/app.js b/docs/demos/classification/dist/app.js
index 38025e0..94d05be 100644
--- a/docs/demos/classification/dist/app.js
+++ b/docs/demos/classification/dist/app.js
@@ -8416,7 +8416,7 @@ var $author$project$Classification$explainGradioError = function (err) {
 		$elm$html$Html$a,
 		_List_fromArray(
 			[
-				$elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'),
+				$elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'),
 				$elm$html$Html$Attributes$class('text-sky-500 hover:underline')
 			]),
 		_List_fromArray(
diff --git a/docs/demos/semseg/dist/app.js b/docs/demos/semseg/dist/app.js
index 2ffc1e3..5f445f8 100644
--- a/docs/demos/semseg/dist/app.js
+++ b/docs/demos/semseg/dist/app.js
@@ -8422,7 +8422,7 @@ var $author$project$Semseg$explainGradioError = function (err) {
 		$elm$html$Html$a,
 		_List_fromArray(
 			[
-				$elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'),
+				$elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'),
 				$elm$html$Html$Attributes$class('text-sky-500 hover:underline')
 			]),
 		_List_fromArray(
diff --git a/docs/index.html b/docs/index.html
index 93e9f07..a9277c1 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -33,7 +33,7 @@ <h1>Sparse Autoencoders for Scientifically Rigorous Interpretation of Vision Mod
       <a href="mailto:stevens.994@buckeyemail.osu.edu">stevens.994@osu.edu</a>
     </p>
     <p class="centered">
-      <a class="pill-button" href="https://github.com/osu-nlp-group/saev">
+      <a class="pill-button" href="https://github.com/Imageomics/saev">
         <img src="assets/icons/github.svg" /> Code
       </a>
       <a class="pill-button" href="https://huggingface.co/collections/osunlp/sae-v-67ab8c4fdf179d117db28195">
@@ -75,7 +75,7 @@ <h2><code>saev</code></h2>
       It also includes some interactive demos for scientifically rigorous interpretation of ViTs.
     </p>
     <p>
-      API reference docs are available below, as well as the <a href="https://github.com/osu-nlp-group/saev">source code on GitHub</a>.
+      API reference docs are available below, as well as the <a href="https://github.com/Imageomics/saev">source code on GitHub</a>.
     </p>
     <h2>API Docs</h2>
     <p>
@@ -94,7 +94,7 @@ <h2>References &amp; Citations</h2>
     title = {{saev}}, 
     author = {Stevens, Samuel and Wei-Lun Chao and Tanya Berger-Wolf and Yu Su},
     license = {MIT},
-    url = {https://github.com/osu-nlp-group/saev}
+    url = {https://github.com/Imageomics/saev}
 }</pre>
     <p>
       Preprint:
diff --git a/docs/internal/archive/reports/2025-10-03/report.typ b/docs/internal/archive/reports/2025-10-03/report.typ
index c9f8893..f48b1a4 100644
--- a/docs/internal/archive/reports/2025-10-03/report.typ
+++ b/docs/internal/archive/reports/2025-10-03/report.typ
@@ -62,7 +62,7 @@ I haven't tried it yet because of some other blockers (see below).
 
 = Refactors
 
-+ #link("https://osu-nlp-group.github.io/saev/api/")[User-facing documentation].
++ #link("https://imageomics.github.io/saev/api/")[User-facing documentation].
 + [in progress] Update disk layout to make it easier to refer from a particular run to the original sharded activations and the image dataset on disk.
 + [in progress] Removing references to images and vision transformers in order to support non-vision but still bi-directional transformers, like audio or other modalities.
 
diff --git a/docs/internal/handoff/main.typ b/docs/internal/handoff/main.typ
index 08e635a..c3b40b3 100644
--- a/docs/internal/handoff/main.typ
+++ b/docs/internal/handoff/main.typ
@@ -201,7 +201,7 @@ Three risks that could stall or kill the project:
 
 = Infrastructure and Data
 
-- *Code:* Currently under the `OSU-NLP-Group` GitHub org. Needs discussion with Tanya about whether to transfer to the Imageomics GitHub org.
+- *Code:* ~Currently under the `OSU-NLP-Group` GitHub org. Needs discussion with Tanya about whether to transfer to~ Now under the Imageomics GitHub org.
 - *Cluster data:* Trained checkpoints, activation caches, and intermediate results live on shared cluster scratch/project space (e.g., `/fs/scratch/PAS2136/`). This data persists but is regenerable from the configs if lost.
 - *No personal storage risk:* All important data is on shared infrastructure, not personal directories.
 
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml
index d80bc4e..32eb583 100644
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -1,7 +1,7 @@
 site_name: saev
 docs_dir: src
 site_dir: api
-site_url: https://osu-nlp-group.github.io/saev/api
+site_url: https://imageomics.github.io/saev/api
 
 theme:
   name: material
@@ -30,7 +30,7 @@ extra_css:
 extra:
   social:
     - icon: fontawesome/brands/github
-      link: https://github.com/OSU-NLP-Group/saev
+      link: https://github.com/Imageomics/saev
 
 nav:
   - Home: index.md
diff --git a/docs/src/index.md b/docs/src/index.md
index 8c41e9c..3cb2fff 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -2,7 +2,7 @@
 
 ![PyPI Downloads](https://static.pepy.tech/badge/saev)
 ![MIT License](https://img.shields.io/badge/License-MIT-efefef)
-![GitHub Repo stars](https://img.shields.io/github/stars/OSU-NLP-group/saev?style=flat&label=GitHub%20%E2%AD%90)
+![GitHub Repo stars](https://img.shields.io/github/stars/Imageomics/saev?style=flat&label=GitHub%20%E2%AD%90)
 
 saev is a framework for training and evaluating **S**parse **a**uto**e**ncoders (SAEs) for **v**ision transformers (ViTs), implemented in PyTorch.
 
diff --git a/docs/src/users/inference.md b/docs/src/users/inference.md
index 718ac1c..642bb44 100644
--- a/docs/src/users/inference.md
+++ b/docs/src/users/inference.md
@@ -1,6 +1,6 @@
 # Inference
 
-> If you want to get started quickly, try the [inference notebook](https://github.com/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb) in marimo or on [Google Colab](https://colab.research.google.com/github/OSU-NLP-Group/saev/blob/main/examples/inference.ipynb).
+> If you want to get started quickly, try the [inference notebook](https://github.com/Imageomics/saev/blob/main/examples/inference.ipynb) in marimo or on [Google Colab](https://colab.research.google.com/github/Imageomics/saev/blob/main/examples/inference.ipynb).
 
 Briefly, you need to:
 
@@ -58,13 +58,13 @@ Available DINOv3 repos:
 The easiest way to do this is to clone the code:
 
 ```
-git clone https://github.com/OSU-NLP-Group/saev
+git clone https://github.com/Imageomics/saev
 ```
 
 You can also install the package from git if you use uv (not sure about pip or cuda):
 
 ```sh
-uv add git+https://github.com/OSU-NLP-Group/saev
+uv add git+https://github.com/Imageomics/saev
 ```
 
 Or clone it and install it as an editable with pip, lik `pip install -e .` in your virtual environment.
@@ -73,7 +73,7 @@ Then you can do things like `from saev import ...`.
 
 !!! note
 
-    If you struggle to get `saev` installed, open an issue on [GitHub](https://github.com/OSU-NLP-Group/saev) and I will figure out how to make it easier.
+    If you struggle to get `saev` installed, open an issue on [GitHub](https://github.com/Imageomics/saev) and I will figure out how to make it easier.
 
 ## Load the Checkpoint
 
diff --git a/examples/inference.ipynb b/examples/inference.ipynb
index c7d6e22..f46a1ec 100644
--- a/examples/inference.ipynb
+++ b/examples/inference.ipynb
@@ -12,7 +12,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "62f8da60",
    "metadata": {
     "execution": {
@@ -136,24 +136,24 @@
      "output_type": "stream",
      "text": [
       "  Downloading duckdb-1.4.4-cp39-cp39-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl (20.4 MB)\r\n",
-      "\u001b[?25l\r",
-      "\u001b[K     |                                | 10 kB 23.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |                                | 20 kB 6.5 MB/s eta 0:00:04\r",
-      "\u001b[K     |                                | 30 kB 4.7 MB/s eta 0:00:05\r",
-      "\u001b[K     |                                | 40 kB 4.3 MB/s eta 0:00:05\r",
-      "\u001b[K     |                                | 51 kB 4.3 MB/s eta 0:00:05\r",
-      "\u001b[K     |                                | 61 kB 3.1 MB/s eta 0:00:07\r",
-      "\u001b[K     |▏                               | 71 kB 3.5 MB/s eta 0:00:06\r",
-      "\u001b[K     |▏                               | 81 kB 4.0 MB/s eta 0:00:06\r",
-      "\u001b[K     |▏                               | 92 kB 4.5 MB/s eta 0:00:05\r",
-      "\u001b[K     |▏                               | 102 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▏                               | 112 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▏                               | 122 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▏                               | 133 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▎                               | 143 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▎                               | 153 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▎                               | 163 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▎                               | 174 kB 4.8 MB/s eta 0:00:05\r",
+      "\u001b[?25l\r\n",
+      "\u001b[K     |                                | 10 kB 23.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |                                | 20 kB 6.5 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |                                | 30 kB 4.7 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |                                | 40 kB 4.3 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |                                | 51 kB 4.3 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |                                | 61 kB 3.1 MB/s eta 0:00:07\r\n",
+      "\u001b[K     |▏                               | 71 kB 3.5 MB/s eta 0:00:06\r\n",
+      "\u001b[K     |▏                               | 81 kB 4.0 MB/s eta 0:00:06\r\n",
+      "\u001b[K     |▏                               | 92 kB 4.5 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▏                               | 102 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▏                               | 112 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▏                               | 122 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▏                               | 133 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▎                               | 143 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▎                               | 153 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▎                               | 163 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▎                               | 174 kB 4.8 MB/s eta 0:00:05\r\n",
       "\u001b[K     |▎                               | 184 kB 4.8 MB/s eta 0:00:05"
      ]
     },
@@ -161,384 +161,384 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "\u001b[K     |▎                               | 194 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▎                               | 204 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 215 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 225 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 235 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 245 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 256 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 266 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▍                               | 276 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 286 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 296 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 307 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 317 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 327 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 337 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▌                               | 348 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 358 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 368 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 378 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 389 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 399 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 409 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▋                               | 419 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 430 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 440 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 450 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 460 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 471 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 481 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▊                               | 491 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 501 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 512 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 522 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 532 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 542 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 552 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |▉                               | 563 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 573 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 583 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 593 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 604 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 614 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 624 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 634 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 645 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 655 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 665 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 675 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 686 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 696 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█                               | 706 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 716 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 727 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 737 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 747 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 757 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 768 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▏                              | 778 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▎                              | 788 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▎                              | 798 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▎                              | 808 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▎                              | 819 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▎                              | 829 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▎                              | 839 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 849 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 860 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 870 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 880 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 890 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 901 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▍                              | 911 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 921 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 931 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 942 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 952 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 962 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 972 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▌                              | 983 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 993 kB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▋                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.1 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:05\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▍                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▋                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▋                             | 1.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▊                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███                             | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▌                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███▉                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▎                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▋                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████                           | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▎                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▎                          | 3.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▍                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▊                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▏                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▍                         | 4.0 MB 4.8 MB/s eta 0:00:04\r",
+      "\r\n",
+      "\u001b[K     |▎                               | 194 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▎                               | 204 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 215 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 225 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 235 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 245 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 256 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 266 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▍                               | 276 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 286 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 296 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 307 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 317 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 327 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 337 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▌                               | 348 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 358 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 368 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 378 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 389 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 399 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 409 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▋                               | 419 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 430 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 440 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 450 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 460 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 471 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 481 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▊                               | 491 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 501 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 512 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 522 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 532 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 542 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 552 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |▉                               | 563 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 573 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 583 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 593 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 604 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 614 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 624 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 634 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 645 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 655 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 665 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 675 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 686 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 696 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█                               | 706 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 716 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 727 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 737 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 747 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 757 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 768 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▏                              | 778 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▎                              | 788 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▎                              | 798 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▎                              | 808 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▎                              | 819 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▎                              | 829 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▎                              | 839 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 849 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 860 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 870 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 880 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 890 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 901 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▍                              | 911 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 921 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 931 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 942 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 952 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 962 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 972 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▌                              | 983 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 993 kB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 1.0 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▋                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▊                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.1 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |█▉                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.2 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██                              | 1.3 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:05\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▏                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▎                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▍                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▌                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▋                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▋                             | 1.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▋                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▊                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██▉                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 1.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███                             | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▏                            | 2.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▎                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▍                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▌                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▋                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▊                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███▉                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████                            | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▏                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▎                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▍                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▌                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 2.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▋                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▊                           | 3.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████▉                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████                           | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▏                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▎                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▎                          | 3.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▎                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▍                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▌                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▋                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▊                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████▉                          | 3.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████                          | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 3.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▏                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▎                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▍                         | 4.0 MB 4.8 MB/s eta 0:00:04\r\n",
       "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04"
      ]
     },
@@ -546,643 +546,643 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▌                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▎                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▊                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████                        | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.2 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▍                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.5 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |████████▉                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▏                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 5.9 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.0 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r",
-      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▋                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▍                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▋                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▋                     | 6.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▊                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 6.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████                     | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▌                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████▉                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▎                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 7.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▋                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████                   | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▎                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▎                  | 8.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▍                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▊                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▏                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▌                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▎                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 9.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▊                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.1 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████                | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.3 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▍               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
+      "\r\n",
+      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▍                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▌                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▋                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▊                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |██████▉                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████                         | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▏                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▎                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▍                        | 4.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▌                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▋                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 4.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▊                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |███████▉                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████                        | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▏                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.2 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▎                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.3 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▍                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▌                       | 5.4 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▋                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.5 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▊                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.6 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |████████▉                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.7 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████                       | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.8 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▏                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▎                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 5.9 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▍                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.0 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▌                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:04\r\n",
+      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▋                      | 6.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▋                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▊                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████▉                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████                      | 6.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▏                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▎                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▍                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▌                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▋                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▋                     | 6.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▋                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▊                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████▉                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 6.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████                     | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▏                    | 7.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▎                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▍                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▌                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▋                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▊                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████▉                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████                    | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▏                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▎                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▍                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 7.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▌                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▋                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▊                   | 8.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████▉                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████                   | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▏                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▎                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▎                  | 8.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▎                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▍                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▌                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▋                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▊                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████▉                  | 8.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 8.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████                  | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▏                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▎                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▍                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▌                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▋                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▊                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |██████████████▉                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████                 | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▏                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▎                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▍                | 9.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▌                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 9.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▋                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.0 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▊                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |███████████████▉                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.1 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.2 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████                | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▏               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.3 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▎               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.4 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▍               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▌               | 10.5 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
       "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03"
      ]
     },
@@ -1190,406 +1190,406 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.6 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |████████████████▉               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r",
-      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▏              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▋              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▍             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▋             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▋             | 11.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▊             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████             | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▌            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▉            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████▉            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▎           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▋           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████           | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▍          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▊          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▏         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▌         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▌         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
+      "\r\n",
+      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▋               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.6 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▊               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.7 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |████████████████▉               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.8 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:03\r\n",
+      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████               | 10.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 10.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▏              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▎              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▍              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▌              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▋              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▊              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████▉              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████              | 11.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▏             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▎             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▍             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▌             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▋             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▋             | 11.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▋             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 11.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▊             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████▉             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████             | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▏            | 12.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▎            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▍            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▌            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▋            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▊            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▉            | 12.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▉            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████▉            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████            | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▏           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 12.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▎           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▍           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▌           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▋           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▊           | 13.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████▉           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████           | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▏          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▎          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▍          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▌          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▋          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▊          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |█████████████████████▉          | 13.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████          | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▏         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▎         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▍         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▌         | 14.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▌         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▌         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▋         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▊         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |██████████████████████▉         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
       "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02"
      ]
     },
@@ -1597,418 +1597,418 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.7 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▎        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.0 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▊        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.2 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████        | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.4 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▍       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r",
-      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████▉       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▏      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▏      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▋      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▍     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 16.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▊     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████     | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▌    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▉    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▉    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 17.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▎   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▋   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████   | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▍  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▋  | 18.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
+      "\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████         | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.7 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▏        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.8 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▎        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▍        | 14.9 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▌        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.0 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▋        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.1 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▊        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |███████████████████████▉        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.2 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.3 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████        | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▏       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.4 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▎       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.5 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▍       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:02\r\n",
+      "\u001b[K     |████████████████████████▌       | 15.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▋       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▊       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████▉       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 15.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▏      | 16.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▏      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▏      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▍      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▌      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▋      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▊      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████      | 16.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▏     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▎     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▍     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▌     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 16.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▋     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▊     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████▉     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████     | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▏    | 17.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▎    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▌    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▋    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▊    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▉    | 17.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▉    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▉    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████    | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 17.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▏   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▎   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▍   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▌   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▋   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▊   | 18.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████▉   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████   | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▏  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▍  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▋  | 18.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
       "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01"
      ]
     },
@@ -2016,152 +2016,152 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
-      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▊  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.5 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.0 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▊| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r",
+      "\r\n",
+      "\u001b[K     |█████████████████████████████▋  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 18.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▊  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▉  | 19.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████  | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▎ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▍ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▌ | 19.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▌ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▋ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.5 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▊ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.6 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▉ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.7 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████ | 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.8 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▏| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 19.9 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▎| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▍| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.0 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▌| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.1 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▋| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.2 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▊| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████████▉| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████████| 20.3 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01\r\n",
       "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s eta 0:00:01"
      ]
     },
@@ -2169,7 +2169,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
+      "\r\n",
       "\u001b[K     |████████████████████████████████| 20.4 MB 4.8 MB/s \r\n"
      ]
     },
@@ -2179,14 +2179,14 @@
      "text": [
       "\u001b[?25hCollecting tqdm>=4.66.5\r\n",
       "  Downloading tqdm-4.67.3-py3-none-any.whl (78 kB)\r\n",
-      "\u001b[?25l\r",
-      "\u001b[K     |████▏                           | 10 kB 37.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████▍                       | 20 kB 52.7 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████▌                   | 30 kB 68.0 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████▊               | 40 kB 77.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████           | 51 kB 84.9 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████       | 61 kB 93.1 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▎  | 71 kB 12.4 MB/s eta 0:00:01\r",
+      "\u001b[?25l\r\n",
+      "\u001b[K     |████▏                           | 10 kB 37.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████▍                       | 20 kB 52.7 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████▌                   | 30 kB 68.0 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████▊               | 40 kB 77.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████           | 51 kB 84.9 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████       | 61 kB 93.1 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▎  | 71 kB 12.4 MB/s eta 0:00:01\r\n",
       "\u001b[K     |████████████████████████████████| 78 kB 6.4 MB/s \r\n"
      ]
     },
@@ -2196,21 +2196,21 @@
      "text": [
       "\u001b[?25hCollecting psutil>=6.1.0\r\n",
       "  Downloading psutil-7.2.2-cp36-abi3-manylinux2010_x86_64.manylinux_2_12_x86_64.manylinux_2_28_x86_64.whl (155 kB)\r\n",
-      "\u001b[?25l\r",
-      "\u001b[K     |██                              | 10 kB 1.7 MB/s eta 0:00:01\r",
-      "\u001b[K     |████▏                           | 20 kB 3.4 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████▎                         | 30 kB 5.0 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████▍                       | 40 kB 6.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████▌                     | 51 kB 8.2 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████▋                   | 61 kB 9.7 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████▊                 | 71 kB 11.2 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████████████▉               | 81 kB 12.7 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████             | 92 kB 14.2 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████           | 102 kB 15.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████▏        | 112 kB 15.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▎      | 122 kB 15.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |███████████████████████████▍    | 133 kB 15.6 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████████▌  | 143 kB 15.6 MB/s eta 0:00:01\r",
+      "\u001b[?25l\r\n",
+      "\u001b[K     |██                              | 10 kB 1.7 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████▏                           | 20 kB 3.4 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████▎                         | 30 kB 5.0 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████▍                       | 40 kB 6.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████▌                     | 51 kB 8.2 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████▋                   | 61 kB 9.7 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████▊                 | 71 kB 11.2 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████████████▉               | 81 kB 12.7 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████             | 92 kB 14.2 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████           | 102 kB 15.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████▏        | 112 kB 15.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▎      | 122 kB 15.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |███████████████████████████▍    | 133 kB 15.6 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████████▌  | 143 kB 15.6 MB/s eta 0:00:01\r\n",
       "\u001b[K     |███████████████████████████████▋| 153 kB 15.6 MB/s eta 0:00:01"
      ]
     },
@@ -2218,7 +2218,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\r",
+      "\r\n",
       "\u001b[K     |████████████████████████████████| 155 kB 15.6 MB/s \r\n",
       "\u001b[?25hCollecting submitit>=1.5.2\r\n"
      ]
@@ -2228,14 +2228,14 @@
      "output_type": "stream",
      "text": [
       "  Downloading submitit-1.5.4-py3-none-any.whl (76 kB)\r\n",
-      "\u001b[?25l\r",
-      "\u001b[K     |████▎                           | 10 kB 45.3 MB/s eta 0:00:01\r",
-      "\u001b[K     |████████▋                       | 20 kB 65.5 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████                   | 30 kB 82.9 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████▎              | 40 kB 92.5 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████▌          | 51 kB 99.9 MB/s eta 0:00:01\r",
-      "\u001b[K     |█████████████████████████▉      | 61 kB 108.5 MB/s eta 0:00:01\r",
-      "\u001b[K     |██████████████████████████████▏ | 71 kB 112.0 MB/s eta 0:00:01\r",
+      "\u001b[?25l\r\n",
+      "\u001b[K     |████▎                           | 10 kB 45.3 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |████████▋                       | 20 kB 65.5 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████                   | 30 kB 82.9 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████▎              | 40 kB 92.5 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████▌          | 51 kB 99.9 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |█████████████████████████▉      | 61 kB 108.5 MB/s eta 0:00:01\r\n",
+      "\u001b[K     |██████████████████████████████▏ | 71 kB 112.0 MB/s eta 0:00:01\r\n",
       "\u001b[K     |████████████████████████████████| 76 kB 2.0 MB/s \r\n"
      ]
     },
@@ -2256,7 +2256,7 @@
     }
    ],
    "source": [
-    "!pip install \"saev @ git+https://github.com/OSU-NLP-Group/saev@6d6eff52c4ae04f5153badc0a553adddc8d3e3cc\""
+    "!pip install \"saev @ git+https://github.com/Imageomics/saev@6d6eff52c4ae04f5153badc0a553adddc8d3e3cc\""
    ]
   },
   {
diff --git a/pyproject.toml b/pyproject.toml
index a511a49..05b0da8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,8 +38,8 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 
 [project.urls]
-Homepage = "https://osu-nlp-group.github.io/saev"
-Issues = "https://github.com/OSU-NLP-Group/saev/issues"
+Homepage = "https://imageomics.github.io/saev"
+Issues = "https://github.com/Imageomics/saev/issues"
 
 [tool.ruff]
 extend-exclude = ["tests/sweeps"]
diff --git a/scripts/export_notebook.py b/scripts/export_notebook.py
index 4c446fe..2411ee0 100644
--- a/scripts/export_notebook.py
+++ b/scripts/export_notebook.py
@@ -11,7 +11,7 @@
 ROOT = Path(__file__).resolve().parent.parent
 MARIMO_SRC = ROOT / "examples" / "inference.py"
 IPYNB_DST = ROOT / "examples" / "inference.ipynb"
-REPO = "OSU-NLP-Group/saev"
+REPO = "Imageomics/saev"
 
 
 def get_commit_hash() -> str:
diff --git a/scripts/push_models.py b/scripts/push_models.py
index 1d10d38..559ab2d 100644
--- a/scripts/push_models.py
+++ b/scripts/push_models.py
@@ -1,5 +1,5 @@
-repo_url = "https://github.com/OSU-NLP-Group/saev"
-docs_url = "https://osu-nlp-group.github.io/saev"
+repo_url = "https://github.com/Imageomics/saev"
+docs_url = "https://imageomics.github.io/saev"
 
 
 def main(
diff --git a/src/web/apps/classification/dist/app.js b/src/web/apps/classification/dist/app.js
index 38025e0..94d05be 100644
--- a/src/web/apps/classification/dist/app.js
+++ b/src/web/apps/classification/dist/app.js
@@ -8416,7 +8416,7 @@ var $author$project$Classification$explainGradioError = function (err) {
 		$elm$html$Html$a,
 		_List_fromArray(
 			[
-				$elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'),
+				$elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'),
 				$elm$html$Html$Attributes$class('text-sky-500 hover:underline')
 			]),
 		_List_fromArray(
diff --git a/src/web/apps/semseg/dist/app.js b/src/web/apps/semseg/dist/app.js
index 2ffc1e3..5f445f8 100644
--- a/src/web/apps/semseg/dist/app.js
+++ b/src/web/apps/semseg/dist/app.js
@@ -8422,7 +8422,7 @@ var $author$project$Semseg$explainGradioError = function (err) {
 		$elm$html$Html$a,
 		_List_fromArray(
 			[
-				$elm$html$Html$Attributes$href('https://github.com/OSU-NLP-Group/saev/issues/new'),
+				$elm$html$Html$Attributes$href('https://github.com/Imageomics/saev/issues/new'),
 				$elm$html$Html$Attributes$class('text-sky-500 hover:underline')
 			]),
 		_List_fromArray(
diff --git a/src/web/src/Classification.elm b/src/web/src/Classification.elm
index 6283a74..b7eb7eb 100644
--- a/src/web/src/Classification.elm
+++ b/src/web/src/Classification.elm
@@ -536,7 +536,7 @@ explainGradioError err =
     let
         githubLink =
             Html.a
-                [ Html.Attributes.href "https://github.com/OSU-NLP-Group/saev/issues/new"
+                [ Html.Attributes.href "https://github.com/Imageomics/saev/issues/new"
                 , class "text-sky-500 hover:underline"
                 ]
                 [ Html.text "GitHub" ]
diff --git a/src/web/src/Semseg.elm b/src/web/src/Semseg.elm
index b0e092d..0b7bc4a 100644
--- a/src/web/src/Semseg.elm
+++ b/src/web/src/Semseg.elm
@@ -507,7 +507,7 @@ explainGradioError err =
     let
         githubLink =
             Html.a
-                [ Html.Attributes.href "https://github.com/OSU-NLP-Group/saev/issues/new"
+                [ Html.Attributes.href "https://github.com/Imageomics/saev/issues/new"
                 , class "text-sky-500 hover:underline"
                 ]
                 [ Html.text "GitHub" ]

From 01cb2acf959967acb063576581527d61ab78348b Mon Sep 17 00:00:00 2001
From: egrace479 <e.campolongo479@gmail.com>
Date: Tue, 9 Jun 2026 10:56:37 -0400
Subject: [PATCH 3/9] Clarify license @samuelstevens first commit:
 a0f8528474f4f19c6a292f01ee1544ed03ff1f22

---
 CITATION.cff | 2 +-
 LICENSE      | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CITATION.cff b/CITATION.cff
index d09bd8c..e3af6c7 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -23,7 +23,7 @@ keywords:
   - sparse autoencoders
   - interpretability
   - computer vision
-license: CC-BY-4.0
+license: MIT
 commit: 6a34b6916fda7b04cc2d89749b3ad6425a8f39e6
 date-released: '2025-11-16'
 
diff --git a/LICENSE b/LICENSE
index 2968c24..15ec217 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,5 +1,7 @@
 MIT License
 
+Copyright (c) 2024-2026 Samuel Stevens
+
 Copyright (c) 2023 Joseph Bloom
 
 Permission is hereby granted, free of charge, to any person obtaining a copy

From 12bce2eeae89ccb4fa3a896e7d5ce03de65183e3 Mon Sep 17 00:00:00 2001
From: egrace479 <e.campolongo479@gmail.com>
Date: Tue, 9 Jun 2026 11:16:23 -0400
Subject: [PATCH 4/9] Add provenance files for Zenodo snapshot

---
 .github/workflows/validate-zenodo.yaml | 23 ++++++++++++++++
 .zenodo.json                           | 37 ++++++++++++++++++++++++++
 CITATION.cff                           | 13 ++++++---
 3 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 .github/workflows/validate-zenodo.yaml
 create mode 100644 .zenodo.json

diff --git a/.github/workflows/validate-zenodo.yaml b/.github/workflows/validate-zenodo.yaml
new file mode 100644
index 0000000..838b1fa
--- /dev/null
+++ b/.github/workflows/validate-zenodo.yaml
@@ -0,0 +1,23 @@
+name: Check zenodo metadata
+
+on:
+    push:
+        paths:
+          - '.zenodo.json'
+          - '.github/workflows/validate-zenodo.yaml'
+
+jobs:
+  check-zenodo-metadata:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v6
+      - uses: actions/setup-node@v6
+        with:
+          node-version: '24'
+      - name: Install dependencies
+        run: npm install zenodraft@0.14.1
+      - name: Check .zenodo.json file
+        run: |
+          npx zenodraft metadata validate .zenodo.json
diff --git a/.zenodo.json b/.zenodo.json
new file mode 100644
index 0000000..d4ae2b4
--- /dev/null
+++ b/.zenodo.json
@@ -0,0 +1,37 @@
+{
+    "creators": [
+      {
+        "name": "Stevens, Samuel",
+        "orcid": "https://orcid.org/0009-0000-9493-7766",
+        "affiliation": "The Ohio State University"
+      }
+    ],
+    "description": "saev is a package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.",
+    "keywords": [
+      "imageomics",
+      "sparse autoencoders",
+      "interpretability",
+      "computer vision"
+    ],
+    "title": "saev: Sparse Autoencoders for Vision Transformers",
+    "version": "0.1.0",
+    "license": "MIT",
+    "publication_date": "2026-06-10",
+    "grants": [
+        {
+            "id": "021nxhr62::2118240"
+        }
+    ],
+    "related_identifiers": [
+        {
+        "identifier": "10.48550/arXiv.2502.06755",
+        "relation": "isSupplementTo",
+        "resource_type": "publication-preprint"
+        },
+        {
+        "identifier": "10.48550/arXiv.2511.17735",
+        "relation": "isSupplementTo",
+        "resource_type": "publication-preprint"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/CITATION.cff b/CITATION.cff
index e3af6c7..e716abc 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -24,6 +24,13 @@ keywords:
   - interpretability
   - computer vision
 license: MIT
-commit: 6a34b6916fda7b04cc2d89749b3ad6425a8f39e6
-date-released: '2025-11-16'
-
+date-released: '2026-06-10'
+identifiers:
+  - description: "The GitHub release URL of tag v0.1.0."
+    type: url
+    value: "https://github.com/Imageomics/saev/releases/tag/v0.1.0"
+  - description: "The GitHub URL of the commit tagged with v0.1.0."
+    type: url
+    value: "https://github.com/Imageomics/saev/tree/<commit-hash>" # Update on release
+version: 0.1.0
+#doi: <version-agnostic DOI from Zenodo>

From 7155c39032cb047f9d606b4fe5c68b37ad220c81 Mon Sep 17 00:00:00 2001
From: Matthew Thompson <thompson.4509@osu.edu>
Date: Fri, 12 Jun 2026 12:14:13 -0400
Subject: [PATCH 5/9] Update docs build

---
 docs/api/api/framework/inference/index.html |   4 +-
 docs/api/api/framework/train/index.html     |  32 +++---
 docs/api/api/nn/modeling/index.html         | 110 +++++++++++---------
 docs/api/api/nn/saev.nn/index.html          | 110 +++++++++++---------
 docs/api/api/utils/wandb/index.html         |  14 ++-
 docs/api/developers/protocol/index.html     |   5 +-
 docs/api/search/search_index.json           |   2 +-
 docs/api/sitemap.xml                        |  98 ++++++++---------
 docs/api/sitemap.xml.gz                     | Bin 556 -> 553 bytes
 docs/api/users/guide/index.html             |   2 +-
 docs/api/users/sweeps/index.html            |   1 +
 11 files changed, 209 insertions(+), 169 deletions(-)

diff --git a/docs/api/api/framework/inference/index.html b/docs/api/api/framework/inference/index.html
index 40d0505..914cb00 100644
--- a/docs/api/api/framework/inference/index.html
+++ b/docs/api/api/framework/inference/index.html
@@ -2647,7 +2647,8 @@ <h2 id="saev.framework.inference.main" class="doc doc-heading">
 <span class="normal">360</span>
 <span class="normal">361</span>
 <span class="normal">362</span>
-<span class="normal">363</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">363</span>
+<span class="normal">364</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span>
     <span class="n">cfg</span><span class="p">:</span> <span class="n">tp</span><span class="o">.</span><span class="n">Annotated</span><span class="p">[</span><span class="n">Config</span><span class="p">,</span> <span class="n">tyro</span><span class="o">.</span><span class="n">conf</span><span class="o">.</span><span class="n">arg</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">)],</span> <span class="n">sweep</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span>
 <span class="p">):</span>
@@ -2691,6 +2692,7 @@ <h2 id="saev.framework.inference.main" class="doc doc-heading">
     <span class="n">executor</span> <span class="o">=</span> <span class="n">submitit</span><span class="o">.</span><span class="n">SlurmExecutor</span><span class="p">(</span><span class="n">folder</span><span class="o">=</span><span class="n">cfg</span><span class="o">.</span><span class="n">log_to</span><span class="p">)</span>
 
     <span class="n">executor</span><span class="o">.</span><span class="n">update_parameters</span><span class="p">(</span>
+        <span class="n">job_name</span><span class="o">=</span><span class="s2">&quot;sae-inference&quot;</span><span class="p">,</span>
         <span class="n">time</span><span class="o">=</span><span class="nb">int</span><span class="p">(</span><span class="n">cfg</span><span class="o">.</span><span class="n">n_hours</span> <span class="o">*</span> <span class="mi">60</span><span class="p">),</span>
         <span class="n">partition</span><span class="o">=</span><span class="n">cfg</span><span class="o">.</span><span class="n">slurm_partition</span><span class="p">,</span>
         <span class="n">gpus_per_node</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
diff --git a/docs/api/api/framework/train/index.html b/docs/api/api/framework/train/index.html
index 6dfae60..d8507f7 100644
--- a/docs/api/api/framework/train/index.html
+++ b/docs/api/api/framework/train/index.html
@@ -3425,9 +3425,7 @@ <h2 id="saev.framework.train.evaluate" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/framework/train.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">508</span>
-<span class="normal">509</span>
-<span class="normal">510</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">510</span>
 <span class="normal">511</span>
 <span class="normal">512</span>
 <span class="normal">513</span>
@@ -3533,7 +3531,9 @@ <h2 id="saev.framework.train.evaluate" class="doc doc-heading">
 <span class="normal">613</span>
 <span class="normal">614</span>
 <span class="normal">615</span>
-<span class="normal">616</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">616</span>
+<span class="normal">617</span>
+<span class="normal">618</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">evaluate</span><span class="p">(</span>
     <span class="n">cfgs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Config</span><span class="p">],</span> <span class="n">saes</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span><span class="p">,</span> <span class="n">objectives</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">ModuleList</span>
@@ -3731,9 +3731,7 @@ <h2 id="saev.framework.train.main" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/framework/train.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">703</span>
-<span class="normal">704</span>
-<span class="normal">705</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">705</span>
 <span class="normal">706</span>
 <span class="normal">707</span>
 <span class="normal">708</span>
@@ -3823,7 +3821,9 @@ <h2 id="saev.framework.train.main" class="doc doc-heading">
 <span class="normal">792</span>
 <span class="normal">793</span>
 <span class="normal">794</span>
-<span class="normal">795</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">795</span>
+<span class="normal">796</span>
+<span class="normal">797</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">main</span><span class="p">(</span>
     <span class="n">cfg</span><span class="p">:</span> <span class="n">tp</span><span class="o">.</span><span class="n">Annotated</span><span class="p">[</span><span class="n">Config</span><span class="p">,</span> <span class="n">tyro</span><span class="o">.</span><span class="n">conf</span><span class="o">.</span><span class="n">arg</span><span class="p">(</span><span class="n">name</span><span class="o">=</span><span class="s2">&quot;&quot;</span><span class="p">)],</span>
     <span class="n">sweep</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span> <span class="o">|</span> <span class="kc">None</span> <span class="o">=</span> <span class="kc">None</span><span class="p">,</span>
@@ -4000,9 +4000,7 @@ <h2 id="saev.framework.train.split_cfgs" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/framework/train.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">667</span>
-<span class="normal">668</span>
-<span class="normal">669</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">669</span>
 <span class="normal">670</span>
 <span class="normal">671</span>
 <span class="normal">672</span>
@@ -4026,7 +4024,9 @@ <h2 id="saev.framework.train.split_cfgs" class="doc doc-heading">
 <span class="normal">690</span>
 <span class="normal">691</span>
 <span class="normal">692</span>
-<span class="normal">693</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">693</span>
+<span class="normal">694</span>
+<span class="normal">695</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">split_cfgs</span><span class="p">(</span><span class="n">cfgs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Config</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="nb">list</span><span class="p">[</span><span class="nb">list</span><span class="p">[</span><span class="n">Config</span><span class="p">]]:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Splits configs into groups that can be parallelized.</span>
@@ -4075,9 +4075,7 @@ <h2 id="saev.framework.train.train" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/framework/train.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">236</span>
-<span class="normal">237</span>
-<span class="normal">238</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">238</span>
 <span class="normal">239</span>
 <span class="normal">240</span>
 <span class="normal">241</span>
@@ -4299,7 +4297,9 @@ <h2 id="saev.framework.train.train" class="doc doc-heading">
 <span class="normal">457</span>
 <span class="normal">458</span>
 <span class="normal">459</span>
-<span class="normal">460</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">460</span>
+<span class="normal">461</span>
+<span class="normal">462</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">train</span><span class="p">(</span>
     <span class="n">cfgs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">Config</span><span class="p">],</span>
 <span class="p">)</span> <span class="o">-&gt;</span> <span class="nb">tuple</span><span class="p">[</span>
diff --git a/docs/api/api/nn/modeling/index.html b/docs/api/api/nn/modeling/index.html
index 7d9c4cd..97623e7 100644
--- a/docs/api/api/nn/modeling/index.html
+++ b/docs/api/api/nn/modeling/index.html
@@ -2836,7 +2836,13 @@ <h2 id="saev.nn.modeling.SparseAutoencoder" class="doc doc-heading">
 <span class="normal">320</span>
 <span class="normal">321</span>
 <span class="normal">322</span>
-<span class="normal">323</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">SparseAutoencoderConfig</span><span class="p">):</span>
+<span class="normal">323</span>
+<span class="normal">324</span>
+<span class="normal">325</span>
+<span class="normal">326</span>
+<span class="normal">327</span>
+<span class="normal">328</span>
+<span class="normal">329</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">SparseAutoencoderConfig</span><span class="p">):</span>
     <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
 
     <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span> <span class="o">=</span> <span class="n">cfg</span>
@@ -2849,7 +2855,13 @@ <h2 id="saev.nn.modeling.SparseAutoencoder" class="doc doc-heading">
 
     <span class="bp">self</span><span class="o">.</span><span class="n">normalize_w_dec</span><span class="p">()</span>
 
-    <span class="c1"># Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.</span>
+    <span class="c1"># Initialize W_enc to the transpose of W_dec.</span>
+    <span class="c1">#</span>
+    <span class="c1"># .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs:</span>
+    <span class="c1">#</span>
+    <span class="c1"># 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values.</span>
+    <span class="c1">#</span>
+    <span class="c1"># 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec.</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">W_enc</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">W_dec</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">T</span><span class="o">.</span><span class="n">clone</span><span class="p">())</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">b_enc</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">cfg</span><span class="o">.</span><span class="n">d_sae</span><span class="p">))</span>
 
@@ -3052,13 +3064,7 @@ <h3 id="saev.nn.modeling.SparseAutoencoder.decode" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">345</span>
-<span class="normal">346</span>
-<span class="normal">347</span>
-<span class="normal">348</span>
-<span class="normal">349</span>
-<span class="normal">350</span>
-<span class="normal">351</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">351</span>
 <span class="normal">352</span>
 <span class="normal">353</span>
 <span class="normal">354</span>
@@ -3110,7 +3116,13 @@ <h3 id="saev.nn.modeling.SparseAutoencoder.decode" class="doc doc-heading">
 <span class="normal">400</span>
 <span class="normal">401</span>
 <span class="normal">402</span>
-<span class="normal">403</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">decode</span><span class="p">(</span>
+<span class="normal">403</span>
+<span class="normal">404</span>
+<span class="normal">405</span>
+<span class="normal">406</span>
+<span class="normal">407</span>
+<span class="normal">408</span>
+<span class="normal">409</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">decode</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">f_x</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;batch d_sae&quot;</span><span class="p">],</span>
     <span class="o">*</span><span class="p">,</span>
@@ -3222,17 +3234,17 @@ <h3 id="saev.nn.modeling.SparseAutoencoder.forward" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">325</span>
-<span class="normal">326</span>
-<span class="normal">327</span>
-<span class="normal">328</span>
-<span class="normal">329</span>
-<span class="normal">330</span>
-<span class="normal">331</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">331</span>
 <span class="normal">332</span>
 <span class="normal">333</span>
 <span class="normal">334</span>
-<span class="normal">335</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;batch d_model&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Output</span><span class="p">:</span>
+<span class="normal">335</span>
+<span class="normal">336</span>
+<span class="normal">337</span>
+<span class="normal">338</span>
+<span class="normal">339</span>
+<span class="normal">340</span>
+<span class="normal">341</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;batch d_model&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Output</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</span>
 
@@ -3265,13 +3277,13 @@ <h3 id="saev.nn.modeling.SparseAutoencoder.normalize_w_dec" class="doc doc-headi
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">405</span>
-<span class="normal">406</span>
-<span class="normal">407</span>
-<span class="normal">408</span>
-<span class="normal">409</span>
-<span class="normal">410</span>
-<span class="normal">411</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">411</span>
+<span class="normal">412</span>
+<span class="normal">413</span>
+<span class="normal">414</span>
+<span class="normal">415</span>
+<span class="normal">416</span>
+<span class="normal">417</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">normalize_w_dec</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Set W_dec to unit-norm columns.</span>
@@ -3300,13 +3312,7 @@ <h3 id="saev.nn.modeling.SparseAutoencoder.remove_parallel_grads" class="doc doc
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">413</span>
-<span class="normal">414</span>
-<span class="normal">415</span>
-<span class="normal">416</span>
-<span class="normal">417</span>
-<span class="normal">418</span>
-<span class="normal">419</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">419</span>
 <span class="normal">420</span>
 <span class="normal">421</span>
 <span class="normal">422</span>
@@ -3326,7 +3332,13 @@ <h3 id="saev.nn.modeling.SparseAutoencoder.remove_parallel_grads" class="doc doc
 <span class="normal">436</span>
 <span class="normal">437</span>
 <span class="normal">438</span>
-<span class="normal">439</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
+<span class="normal">439</span>
+<span class="normal">440</span>
+<span class="normal">441</span>
+<span class="normal">442</span>
+<span class="normal">443</span>
+<span class="normal">444</span>
+<span class="normal">445</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">remove_parallel_grads</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Update grads so that they remove the parallel component</span>
@@ -3804,13 +3816,7 @@ <h2 id="saev.nn.modeling.dump" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">542</span>
-<span class="normal">543</span>
-<span class="normal">544</span>
-<span class="normal">545</span>
-<span class="normal">546</span>
-<span class="normal">547</span>
-<span class="normal">548</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">548</span>
 <span class="normal">549</span>
 <span class="normal">550</span>
 <span class="normal">551</span>
@@ -3830,7 +3836,13 @@ <h2 id="saev.nn.modeling.dump" class="doc doc-heading">
 <span class="normal">565</span>
 <span class="normal">566</span>
 <span class="normal">567</span>
-<span class="normal">568</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">568</span>
+<span class="normal">569</span>
+<span class="normal">570</span>
+<span class="normal">571</span>
+<span class="normal">572</span>
+<span class="normal">573</span>
+<span class="normal">574</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">dump</span><span class="p">(</span><span class="n">fpath</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span> <span class="o">|</span> <span class="nb">str</span><span class="p">,</span> <span class="n">sae</span><span class="p">:</span> <span class="n">SparseAutoencoder</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).</span>
@@ -3879,13 +3891,7 @@ <h2 id="saev.nn.modeling.load" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">571</span>
-<span class="normal">572</span>
-<span class="normal">573</span>
-<span class="normal">574</span>
-<span class="normal">575</span>
-<span class="normal">576</span>
-<span class="normal">577</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">577</span>
 <span class="normal">578</span>
 <span class="normal">579</span>
 <span class="normal">580</span>
@@ -3960,7 +3966,13 @@ <h2 id="saev.nn.modeling.load" class="doc doc-heading">
 <span class="normal">649</span>
 <span class="normal">650</span>
 <span class="normal">651</span>
-<span class="normal">652</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">652</span>
+<span class="normal">653</span>
+<span class="normal">654</span>
+<span class="normal">655</span>
+<span class="normal">656</span>
+<span class="normal">657</span>
+<span class="normal">658</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">load</span><span class="p">(</span><span class="n">fpath</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span> <span class="o">|</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s2">&quot;cpu&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparseAutoencoder</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Loads a sparse autoencoder from disk.</span>
diff --git a/docs/api/api/nn/saev.nn/index.html b/docs/api/api/nn/saev.nn/index.html
index 2f9ab87..8874d27 100644
--- a/docs/api/api/nn/saev.nn/index.html
+++ b/docs/api/api/nn/saev.nn/index.html
@@ -2250,7 +2250,13 @@ <h2 id="saev.nn.SparseAutoencoder" class="doc doc-heading">
 <span class="normal">320</span>
 <span class="normal">321</span>
 <span class="normal">322</span>
-<span class="normal">323</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">SparseAutoencoderConfig</span><span class="p">):</span>
+<span class="normal">323</span>
+<span class="normal">324</span>
+<span class="normal">325</span>
+<span class="normal">326</span>
+<span class="normal">327</span>
+<span class="normal">328</span>
+<span class="normal">329</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="p">:</span> <span class="n">SparseAutoencoderConfig</span><span class="p">):</span>
     <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
 
     <span class="bp">self</span><span class="o">.</span><span class="n">cfg</span> <span class="o">=</span> <span class="n">cfg</span>
@@ -2263,7 +2269,13 @@ <h2 id="saev.nn.SparseAutoencoder" class="doc doc-heading">
 
     <span class="bp">self</span><span class="o">.</span><span class="n">normalize_w_dec</span><span class="p">()</span>
 
-    <span class="c1"># Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.</span>
+    <span class="c1"># Initialize W_enc to the transpose of W_dec.</span>
+    <span class="c1">#</span>
+    <span class="c1"># .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs:</span>
+    <span class="c1">#</span>
+    <span class="c1"># 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values.</span>
+    <span class="c1">#</span>
+    <span class="c1"># 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec.</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">W_enc</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">W_dec</span><span class="o">.</span><span class="n">data</span><span class="o">.</span><span class="n">T</span><span class="o">.</span><span class="n">clone</span><span class="p">())</span>
     <span class="bp">self</span><span class="o">.</span><span class="n">b_enc</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">nn</span><span class="o">.</span><span class="n">Parameter</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">cfg</span><span class="o">.</span><span class="n">d_sae</span><span class="p">))</span>
 
@@ -2466,13 +2478,7 @@ <h3 id="saev.nn.SparseAutoencoder.decode" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">345</span>
-<span class="normal">346</span>
-<span class="normal">347</span>
-<span class="normal">348</span>
-<span class="normal">349</span>
-<span class="normal">350</span>
-<span class="normal">351</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">351</span>
 <span class="normal">352</span>
 <span class="normal">353</span>
 <span class="normal">354</span>
@@ -2524,7 +2530,13 @@ <h3 id="saev.nn.SparseAutoencoder.decode" class="doc doc-heading">
 <span class="normal">400</span>
 <span class="normal">401</span>
 <span class="normal">402</span>
-<span class="normal">403</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">decode</span><span class="p">(</span>
+<span class="normal">403</span>
+<span class="normal">404</span>
+<span class="normal">405</span>
+<span class="normal">406</span>
+<span class="normal">407</span>
+<span class="normal">408</span>
+<span class="normal">409</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">decode</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">f_x</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;batch d_sae&quot;</span><span class="p">],</span>
     <span class="o">*</span><span class="p">,</span>
@@ -2636,17 +2648,17 @@ <h3 id="saev.nn.SparseAutoencoder.forward" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">325</span>
-<span class="normal">326</span>
-<span class="normal">327</span>
-<span class="normal">328</span>
-<span class="normal">329</span>
-<span class="normal">330</span>
-<span class="normal">331</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">331</span>
 <span class="normal">332</span>
 <span class="normal">333</span>
 <span class="normal">334</span>
-<span class="normal">335</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;batch d_model&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Output</span><span class="p">:</span>
+<span class="normal">335</span>
+<span class="normal">336</span>
+<span class="normal">337</span>
+<span class="normal">338</span>
+<span class="normal">339</span>
+<span class="normal">340</span>
+<span class="normal">341</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">Float</span><span class="p">[</span><span class="n">Tensor</span><span class="p">,</span> <span class="s2">&quot;batch d_model&quot;</span><span class="p">])</span> <span class="o">-&gt;</span> <span class="n">Output</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</span>
 
@@ -2679,13 +2691,13 @@ <h3 id="saev.nn.SparseAutoencoder.normalize_w_dec" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">405</span>
-<span class="normal">406</span>
-<span class="normal">407</span>
-<span class="normal">408</span>
-<span class="normal">409</span>
-<span class="normal">410</span>
-<span class="normal">411</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">411</span>
+<span class="normal">412</span>
+<span class="normal">413</span>
+<span class="normal">414</span>
+<span class="normal">415</span>
+<span class="normal">416</span>
+<span class="normal">417</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">normalize_w_dec</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Set W_dec to unit-norm columns.</span>
@@ -2714,13 +2726,7 @@ <h3 id="saev.nn.SparseAutoencoder.remove_parallel_grads" class="doc doc-heading"
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">413</span>
-<span class="normal">414</span>
-<span class="normal">415</span>
-<span class="normal">416</span>
-<span class="normal">417</span>
-<span class="normal">418</span>
-<span class="normal">419</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">419</span>
 <span class="normal">420</span>
 <span class="normal">421</span>
 <span class="normal">422</span>
@@ -2740,7 +2746,13 @@ <h3 id="saev.nn.SparseAutoencoder.remove_parallel_grads" class="doc doc-heading"
 <span class="normal">436</span>
 <span class="normal">437</span>
 <span class="normal">438</span>
-<span class="normal">439</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
+<span class="normal">439</span>
+<span class="normal">440</span>
+<span class="normal">441</span>
+<span class="normal">442</span>
+<span class="normal">443</span>
+<span class="normal">444</span>
+<span class="normal">445</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">()</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">remove_parallel_grads</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Update grads so that they remove the parallel component</span>
@@ -3049,13 +3061,7 @@ <h2 id="saev.nn.dump" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">542</span>
-<span class="normal">543</span>
-<span class="normal">544</span>
-<span class="normal">545</span>
-<span class="normal">546</span>
-<span class="normal">547</span>
-<span class="normal">548</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">548</span>
 <span class="normal">549</span>
 <span class="normal">550</span>
 <span class="normal">551</span>
@@ -3075,7 +3081,13 @@ <h2 id="saev.nn.dump" class="doc doc-heading">
 <span class="normal">565</span>
 <span class="normal">566</span>
 <span class="normal">567</span>
-<span class="normal">568</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">568</span>
+<span class="normal">569</span>
+<span class="normal">570</span>
+<span class="normal">571</span>
+<span class="normal">572</span>
+<span class="normal">573</span>
+<span class="normal">574</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">dump</span><span class="p">(</span><span class="n">fpath</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span> <span class="o">|</span> <span class="nb">str</span><span class="p">,</span> <span class="n">sae</span><span class="p">:</span> <span class="n">SparseAutoencoder</span><span class="p">):</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).</span>
@@ -3124,13 +3136,7 @@ <h2 id="saev.nn.load" class="doc doc-heading">
 
             <details class="mkdocstrings-source">
               <summary>Source code in <code>src/saev/nn/modeling.py</code></summary>
-              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">571</span>
-<span class="normal">572</span>
-<span class="normal">573</span>
-<span class="normal">574</span>
-<span class="normal">575</span>
-<span class="normal">576</span>
-<span class="normal">577</span>
+              <div class="highlight"><table class="highlighttable"><tr><td class="linenos"><div class="linenodiv"><pre><span></span><span class="normal">577</span>
 <span class="normal">578</span>
 <span class="normal">579</span>
 <span class="normal">580</span>
@@ -3205,7 +3211,13 @@ <h2 id="saev.nn.load" class="doc doc-heading">
 <span class="normal">649</span>
 <span class="normal">650</span>
 <span class="normal">651</span>
-<span class="normal">652</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
+<span class="normal">652</span>
+<span class="normal">653</span>
+<span class="normal">654</span>
+<span class="normal">655</span>
+<span class="normal">656</span>
+<span class="normal">657</span>
+<span class="normal">658</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="nd">@beartype</span><span class="o">.</span><span class="n">beartype</span>
 <span class="k">def</span><span class="w"> </span><span class="nf">load</span><span class="p">(</span><span class="n">fpath</span><span class="p">:</span> <span class="n">pathlib</span><span class="o">.</span><span class="n">Path</span> <span class="o">|</span> <span class="nb">str</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">device</span><span class="o">=</span><span class="s2">&quot;cpu&quot;</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">SparseAutoencoder</span><span class="p">:</span>
 <span class="w">    </span><span class="sd">&quot;&quot;&quot;</span>
 <span class="sd">    Loads a sparse autoencoder from disk.</span>
diff --git a/docs/api/api/utils/wandb/index.html b/docs/api/api/utils/wandb/index.html
index 688b234..d44bbcc 100644
--- a/docs/api/api/utils/wandb/index.html
+++ b/docs/api/api/utils/wandb/index.html
@@ -2063,7 +2063,12 @@ <h2 id="saev.utils.wandb.ParallelWandbRun" class="doc doc-heading">
 <span class="normal">30</span>
 <span class="normal">31</span>
 <span class="normal">32</span>
-<span class="normal">33</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
+<span class="normal">33</span>
+<span class="normal">34</span>
+<span class="normal">35</span>
+<span class="normal">36</span>
+<span class="normal">37</span>
+<span class="normal">38</span></pre></div></td><td class="code"><div><pre><span></span><code><span class="k">def</span><span class="w"> </span><span class="fm">__init__</span><span class="p">(</span>
     <span class="bp">self</span><span class="p">,</span>
     <span class="n">project</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span>
     <span class="n">cfgs</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">object</span><span class="p">]],</span>
@@ -2080,7 +2085,12 @@ <h2 id="saev.utils.wandb.ParallelWandbRun" class="doc doc-heading">
     <span class="bp">self</span><span class="o">.</span><span class="n">summary_updates</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="nb">object</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
 
     <span class="bp">self</span><span class="o">.</span><span class="n">live_run</span> <span class="o">=</span> <span class="n">wandb</span><span class="o">.</span><span class="n">init</span><span class="p">(</span>
-        <span class="n">project</span><span class="o">=</span><span class="n">project</span><span class="p">,</span> <span class="n">config</span><span class="o">=</span><span class="n">cfg</span><span class="p">,</span> <span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span> <span class="n">tags</span><span class="o">=</span><span class="n">tags</span><span class="p">,</span> <span class="nb">dir</span><span class="o">=</span><span class="nb">dir</span>
+        <span class="n">project</span><span class="o">=</span><span class="n">project</span><span class="p">,</span>
+        <span class="n">config</span><span class="o">=</span><span class="n">cfg</span><span class="p">,</span>
+        <span class="n">mode</span><span class="o">=</span><span class="n">mode</span><span class="p">,</span>
+        <span class="n">tags</span><span class="o">=</span><span class="n">tags</span><span class="p">,</span>
+        <span class="nb">dir</span><span class="o">=</span><span class="nb">dir</span><span class="p">,</span>
+        <span class="n">settings</span><span class="o">=</span><span class="n">wandb</span><span class="o">.</span><span class="n">Settings</span><span class="p">(</span><span class="n">init_timeout</span><span class="o">=</span><span class="mi">300</span><span class="p">),</span>
     <span class="p">)</span>
 
     <span class="bp">self</span><span class="o">.</span><span class="n">metric_queues</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="n">MetricQueue</span><span class="p">]</span> <span class="o">=</span> <span class="p">[[]</span> <span class="k">for</span> <span class="n">_</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">cfgs</span><span class="p">]</span>
diff --git a/docs/api/developers/protocol/index.html b/docs/api/developers/protocol/index.html
index 9762ef2..94d021d 100644
--- a/docs/api/developers/protocol/index.html
+++ b/docs/api/developers/protocol/index.html
@@ -2225,7 +2225,10 @@ <h2 id="3-shard-sizing-maths">3. Shard sizing maths<a class="headerlink" href="#
 examples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))
 
 shape_per_shard = (
-    examples_per_shard, len(layers), tokens_per_ex, d_model,
+    examples_per_shard,
+    len(layers),
+    tokens_per_ex,
+    d_model,
 )
 </code></pre>
 <p><em><code>patches_per_shard</code> is a </em><em>budget</em><em> (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ <code>d_model = 1024</code>.</em></p>
diff --git a/docs/api/search/search_index.json b/docs/api/search/search_index.json
index 4b95a4f..b90e25d 100644
--- a/docs/api/search/search_index.json
+++ b/docs/api/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"<p>saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.</p>"},{"location":"#installation","title":"Installation","text":"<p>Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.</p> <p>Clone this repository, then from the root directory:</p> <pre><code>uv run scripts/launch.py --help\n</code></pre> <p>This will create a virtual environment and display the help for all the provided framework scripts.</p>"},{"location":"#quick-start","title":"Quick Start","text":"<p>Save some activations to disk:</p> <pre><code>uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n</code></pre> <p>Read the guide for details.</p>"},{"location":"#why-saev","title":"Why saev?","text":"<p>There are plenty of alternative libraries for SAEs:</p> <ul> <li>Overcomplete, primarily developed by Thomas Fel.</li> </ul> <p>However, saev has some benefits:</p> <ol> <li>saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.</li> <li>saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.</li> <li>saev includes complete code from preprints in the <code>contrib/</code> directory, along with logbooks describing how the authors used and developed saev.</li> </ol>"},{"location":"api/colors/","title":"saev.colors","text":"<p>Utility color palettes used across saev visualizations.</p>"},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"<code>dict_to_dataclass(data, cls)</code>","text":"<p>Recursively convert a dictionary to a dataclass instance.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -&gt; T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n</code></pre>"},{"location":"api/configs/#saev.configs.expand","title":"<code>expand(config)</code>","text":"<p>Expand a nested dict that may contain lists into many dicts.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef expand(config: dict[str, object]) -&gt; Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n</code></pre>"},{"location":"api/configs/#saev.configs.get_non_default_values","title":"<code>get_non_default_values(obj, default_obj)</code>","text":"<p>Recursively find fields that differ from defaults.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -&gt; dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n</code></pre>"},{"location":"api/configs/#saev.configs.load_cfgs","title":"<code>load_cfgs(override, *, default, sweep_dcts)</code>","text":"<p>Load a list of configs from a combination of sources.</p> <p>Parameters:</p> Name Type Description Default <code>override</code> <code>T</code> <p>Command-line overridden values.</p> required <code>default</code> <code>T</code> <p>The default values for a config.</p> required <code>sweep_dcts</code> <code>list[dict]</code> <p>A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.</p> required <p>Returns:</p> Type Description <code>tuple[list[T], list[str]]</code> <p>A list of configs and a list of errors.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -&gt; tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n</code></pre>"},{"location":"api/configs/#saev.configs.load_sweep","title":"<code>load_sweep(sweep_fpath)</code>","text":"<p>Load a sweep file and return the list of config dicts.</p> <p>Parameters:</p> Name Type Description Default <code>sweep_fpath</code> <code>Path</code> <p>Path to a Python file with a <code>make_cfgs()</code> function.</p> required <p>Returns:</p> Type Description <code>list[dict]</code> <p>List of config dictionaries from <code>make_cfgs()</code>. Returns empty list if any error occurs.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -&gt; list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n</code></pre>"},{"location":"api/disk/","title":"saev.disk","text":"<p>Helpers for sticking with the layout described in disk-layout.md.</p>"},{"location":"api/disk/#saev.disk.Run","title":"<code>Run(run_dir)</code>","text":"<p>Represents an SAE training run and some associated data.</p> <p>Parameters:</p> Name Type Description Default <code>run_dir</code> <code>Path</code> <p>Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use <code>Run.new()</code> to create a new run. required Source code in <code>src/saev/disk.py</code> <pre><code>def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) &lt; 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n</code></pre>"},{"location":"api/disk/#saev.disk.Run.ckpt","title":"<code>ckpt</code>  <code>property</code>","text":"<p>Path to the sae.pt checkpoint.</p>"},{"location":"api/disk/#saev.disk.Run.config","title":"<code>config</code>  <code>property</code>","text":"<p>The training run config. Not a train.Config object because we don't want to import from train.py.</p>"},{"location":"api/disk/#saev.disk.Run.inference","title":"<code>inference</code>  <code>property</code>","text":"<p>Path to the inference/ directory.</p>"},{"location":"api/disk/#saev.disk.Run.run_id","title":"<code>run_id</code>  <code>property</code>","text":"<p>The run ID, created by wandb.</p>"},{"location":"api/disk/#saev.disk.Run.train_shards","title":"<code>train_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.val_shards","title":"<code>val_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.new","title":"<code>new(run_id, *, train_shards_dir, val_shards_dir, runs_root)</code>  <code>classmethod</code>","text":"<p>Create a new run with directory structure and symlinks.</p> <p>Parameters:</p> Name Type Description Default <code>run_id</code> <code>str</code> <p>The run ID (typically from wandb).</p> required <code>train_shards_dir</code> <code>Path</code> <p>Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>val_shards_dir</code> <code>Path</code> <p>Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>runs_root</code> <code>Path</code> <p>Root directory for runs (typically $SAEV_NFS/saev/runs).</p> required <p>Returns:</p> Type Description <code>Run</code> <p>A new Run instance with all directories and symlinks created.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -&gt; \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n</code></pre>"},{"location":"api/disk/#saev.disk.is_runs_root","title":"<code>is_runs_root(path)</code>","text":"<p>Check if <code>path</code> is a valid runs root directory.</p> <p>A valid runs root ends with <code>saev/runs</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/runs.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_runs_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_dir","title":"<code>is_shards_dir(path)</code>","text":"<p>Check if <code>path</code> is a specific shards directory.</p> <p>A valid shards directory ends with <code>saev/shards/&lt;hash&gt;</code> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards/ with required files. Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/&lt;hash&gt;` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/&lt;hash&gt; with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) &lt; 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_root","title":"<code>is_shards_root(path)</code>","text":"<p>Check if <code>path</code> is a valid shards root directory.</p> <p>A valid shards root ends with <code>saev/shards</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n</code></pre>"},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"<code>RemovedFeatureError</code>","text":"<p>               Bases: <code>RuntimeError</code></p> <p>Feature existed before but is no longer supported.</p>"},{"location":"api/helpers/#saev.helpers.batched_idx","title":"<code>batched_idx(total_size, batch_size)</code>","text":"<p>Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.</p> <p>Parameters:</p> Name Type Description Default <code>total_size</code> <code>int</code> <p>total number of examples</p> required <code>batch_size</code> <code>int</code> <p>maximum distance between the generated indices.</p> required <p>Returns:</p> Type Description <p>A generator of (int, int) tuples that can slice up a list or a tensor.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"<code>__iter__()</code>","text":"<p>Yield (start, end) index pairs for batching.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __iter__(self) -&gt; Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"<code>__len__()</code>","text":"<p>Return the number of batches.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.progress","title":"<code>progress(it, *, every=10, desc='progress', total=0)</code>","text":"<p>Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.</p> <p>Parameters:</p> Name Type Description Default <code>it</code> <code>Iterable</code> <p>Iterable to wrap.</p> required <code>every</code> <code>int</code> <p>How many iterations between logging progress.</p> <code>10</code> <code>desc</code> <code>str</code> <p>What to name the logger.</p> <code>'progress'</code> <code>total</code> <code>int</code> <p>If non-zero, how long the iterable is.</p> <code>0</code> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n</code></pre>"},{"location":"api/helpers/#saev.helpers.csr_topk","title":"<code>csr_topk(arr, *, k, axis=0, batch_size=1024)</code>","text":"<p>Takes the top k values of a sparse CSR array.</p> <p>We can only iterate efficiently over rows because it's a a CSR array.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>csr_array | csr_matrix</code> <p>The CSR array of values with shape (rows, cols).</p> required <code>k</code> <code>int</code> <p>The k in \"top-k\".</p> required <code>axis</code> <code>int</code> <p>The dimension to sort along.</p> <code>0</code> <code>batch_size</code> <code>int</code> <p>How many rows to process at once.</p> <code>1024</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>saev.helpers.NumpyTopK</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -&gt; NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n</code></pre>"},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"<code>current_git_commit()</code>","text":"<p>Best-effort short SHA of the repo containing this file.</p> <p>Returns <code>None</code> when * <code>git</code> executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef current_git_commit() -&gt; str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n</code></pre>"},{"location":"api/helpers/#saev.helpers.flattened","title":"<code>flattened(dct, *, sep='.')</code>","text":"<p>Flatten a potentially nested dict to a single-level dict with <code>.</code>-separated keys.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -&gt; dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n</code></pre>"},{"location":"api/helpers/#saev.helpers.fssafe","title":"<code>fssafe(s)</code>","text":"<p>Convert a string to be filesystem-safe by replacing special characters.</p> <p>This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.</p> <p>Parameters:</p> Name Type Description Default <code>s</code> <code>str</code> <p>String to make filesystem-safe.</p> required <p>Returns:</p> Type Description <code>str</code> <p>Filesystem-safe version of the string.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef fssafe(s: str) -&gt; str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"&lt;\": \"_\",\n        \"&gt;\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"<code>get_cache_dir()</code>","text":"<p>Get cache directory from environment variables, defaulting to the current working directory (.)</p> <p>Returns:</p> Type Description <code>str</code> <p>A path to a cache directory (might not exist yet).</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_cache_dir() -&gt; str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"<code>get_slurm_job_count()</code>","text":"<p>Get the current number of jobs in the queue for the current user.</p> <p>Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_job_count() -&gt; int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"<code>get_slurm_max_array_size()</code>","text":"<p>Get the MaxArraySize configuration from the current Slurm cluster.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_array_size() -&gt; int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"<code>get_slurm_max_submit_jobs()</code>","text":"<p>Get the MaxSubmitJobs limit from the current user's QOS.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_submit_jobs() -&gt; int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.np_topk","title":"<code>np_topk(arr, k, axis=None)</code>","text":"<p>A numpy implementation of torch.topk.</p> <p>Returns the k largest elements along the given axis. If axis is None, the array is flattened first.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>ndarray</code> <p>Input array.</p> required <code>k</code> <code>int</code> <p>Number of top elements to return.</p> required <code>axis</code> <code>int | None</code> <p>Axis along which to find top k elements. If None, flattens array first.</p> <code>None</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>Array of k largest values along the specified axis, sorted in descending order.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -&gt; NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis &lt; 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"<code>submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)</code>","text":"<p>Submit jobs in batches to respect Slurm's MaxArraySize limit.</p> <p>Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.</p> <p>Parameters:</p> Name Type Description Default <code>executor</code> <p>A submitit executor (SlurmExecutor or LocalExecutor).</p> required <code>fn</code> <code>Callable</code> <p>Worker function to call for each config.</p> required <code>args_list</code> <code>list</code> <p>List of arguments to pass to fn.</p> required <code>logger</code> <code>Logger | None</code> <p>Optional logger for progress messages.</p> <code>None</code> <code>margin</code> <code>float</code> <p>Fraction of MaxArraySize to use (default 0.8).</p> <code>0.8</code> <p>Yields:</p> Type Description <code>int</code> <p>Tuples of (global_index, result) for successful jobs.</p> <code>object</code> <p>For failed jobs, yields (global_index, None) and logs a warning.</p> Example <pre><code>executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n</code></pre> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -&gt; Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n</code></pre>"},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"<code>Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements)</code>  <code>dataclass</code>","text":"<p>Validated reconstruction metrics aggregated over one evaluation corpus.</p> <p>The primary totals are <code>sse_recon</code> (SAE reconstruction SSE) and <code>sse_baseline</code> (mean-baseline SSE). Derived terms are: - <code>normalized_mse = sse_recon / sse_baseline</code> - <code>mse_per_dim = sse_recon / n_elements</code> - <code>mse_per_token = sse_recon / n_tokens</code> - <code>baseline_mse_per_dim = sse_baseline / n_elements</code> - <code>baseline_mse_per_token = sse_baseline / n_tokens</code></p> <p>Size terms are: - <code>n_tokens</code>: number of tokens included in aggregation - <code>d_model</code>: embedding width per token - <code>n_elements = n_tokens * d_model</code></p>"},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"<code>from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model)</code>  <code>classmethod</code>","text":"<p>Construct metrics from aggregate sums and shape information.</p> <p>Parameters:</p> Name Type Description Default <code>sse_recon</code> <code>float</code> <p>Sum of squared reconstruction errors over all selected tokens and dimensions.</p> required <code>sse_baseline</code> <code>float</code> <p>Sum of squared mean-baseline errors over the same tokens and dimensions.</p> required <code>n_tokens</code> <code>int</code> <p>Number of selected tokens in the aggregation set.</p> required <code>d_model</code> <code>int</code> <p>Activation dimension per token.</p> required <p>Returns:</p> Type Description <code>Metrics</code> <p>A validated <code>Metrics</code> object with all derived fields populated.</p> Source code in <code>src/saev/metrics.py</code> <pre><code>@classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -&gt; \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens &gt; 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model &gt; 0, msg\n    msg = f\"sse_recon must be &gt;= 0, got {sse_recon}.\"\n    assert sse_recon &gt;= 0.0, msg\n    msg = f\"sse_baseline must be &gt; 0, got {sse_baseline}.\"\n    assert sse_baseline &gt; 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n</code></pre>"},{"location":"api/saev/","title":"saev","text":"<p>saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.</p>"},{"location":"api/summary/","title":"Summary","text":"<ul> <li>saev</li> <li>saev.colors</li> <li>saev.configs</li> <li>saev.data</li> <li>saev.data.bird_mae</li> <li>saev.data.buffers</li> <li>saev.data.clip</li> <li>saev.data.datasets</li> <li>saev.data.dinov2</li> <li>saev.data.dinov3</li> <li>saev.data.fake_clip</li> <li>saev.data.indexed</li> <li>saev.data.models</li> <li>saev.data.ordered</li> <li>saev.data.pe</li> <li>saev.data.shards</li> <li>saev.data.shuffled</li> <li>saev.data.siglip</li> <li>saev.data.transforms</li> <li>saev.disk</li> <li>saev.framework</li> <li>saev.framework.inference</li> <li>saev.framework.shards</li> <li>saev.framework.train</li> <li>saev.helpers</li> <li>saev.metrics</li> <li>saev.nn</li> <li>saev.nn.modeling</li> <li>saev.nn.objectives</li> <li>saev.utils</li> <li>saev.utils.monitoring</li> <li>saev.utils.scheduling</li> <li>saev.utils.statistics</li> <li>saev.utils.wandb</li> <li>saev.viz</li> </ul>"},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"<code>load_palette(path)</code>","text":"<p>TODO: docstring.</p> Source code in <code>src/saev/viz.py</code> <pre><code>@beartype.beartype\ndef load_palette(path: pathlib.Path) -&gt; list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 &lt;= chan &lt;= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n</code></pre>"},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"<code>Encoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Pure PyTorch Bird-MAE backbone (no HF).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, cfg: Config) -&gt; None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"<code>PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Image (time x mel) to patch embeddings.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -&gt; None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"<code>Transformer(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"<code>filter_audio(waveform, sample_rate, patches, *, mode='time')</code>","text":"<p>Filter audio based on SAE patch activations over the log-mel spectrogram.</p> <p>Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.</p> <p>Parameters:</p> Name Type Description Default <code>waveform</code> <code>Float[Tensor, ' samples']</code> <p>Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.</p> required <code>sample_rate</code> <code>int</code> <p>Audio sample rate in Hz. Should be 32000 for Bird-MAE.</p> required <code>patches</code> <code>Bool[Tensor, ' content_tokens_per_example']</code> <p>Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.</p> required <code>mode</code> <code>Literal['time', 'time+freq']</code> <p>Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.</p> <code>'time'</code> <p>Returns:</p> Type Description <code>Float[Tensor, ' clipped']</code> <p>Filtered audio waveform as a 1D torch tensor.</p> Example <p>waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np)  # [512, 128] waveform = torch.from_numpy(waveform_np)</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -&gt; Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        &gt;&gt;&gt; waveform_np, sr = librosa.load(audio_path, sr=32000)\n        &gt;&gt;&gt; mel = bird_mae.transform(waveform_np)  # [512, 128]\n        &gt;&gt;&gt; waveform = torch.from_numpy(waveform_np)\n        &gt;&gt;&gt; # ... run through SAE to get patch_activations [256] ...\n        &gt;&gt;&gt; # ... covert SAE activations to bool with &gt; 0 ...\n        &gt;&gt;&gt; time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        &gt;&gt;&gt; time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() &lt; max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs &gt;= hz_low) &amp; (freqs &lt; hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start &lt; valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start &gt;= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with &gt; 0 ...","text":"<p>time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")</p>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"<code>transform(waveform)</code>","text":"<p>waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -&gt; Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples &lt; max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t &lt; BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t &gt; BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n</code></pre>"},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"<code>ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)</code>","text":"<p>Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -&gt; (x, meta). Random order, each sample delivered once, blocking semantics.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"<code>RingBuffer(slots, shape, dtype)</code>","text":"<p>Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.</p>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"<p>slots  : int           capacity in number of items (tensor rows) shape  : tuple[int]    shape of one item, e.g. (batch, dim) dtype  : torch.dtype   tensor dtype</p> <p>put(tensor)  : blocks if full get() -&gt; tensor  : blocks if empty qsize() -&gt; int        advisory size (approximate) close()               frees shared storage (call in the main process)</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots &gt; 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -&gt; Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"<code>get()</code>","text":"<p>Return a view of the next item; blocks if the queue is empty.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def get(self) -&gt; torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"<code>put(tensor)</code>","text":"<p>Copy <code>tensor</code> into the next free slot; blocks if the queue is full.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def put(self, tensor: torch.Tensor) -&gt; None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 &lt;&lt; 64)\n</code></pre>"},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Get patch size for CLIP models.</p>"},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"<code>BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).</p> <p>See https://www.kaggle.com/competitions/birdclef-2025/data for more information.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of bird audio samples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"<code>root = pathlib.Path('data/birdclef-2025')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory containing the BirdCLEF 2025 data.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"<code>split = 'train_audio'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which data split to use.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"<code>BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -&gt; target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"<code>n_classes</code>  <code>property</code>","text":"<p>Number of bird species.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"<code>Cifar10(name='uoft-cs/cifar10', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace CIFAR-10.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"<code>name = 'uoft-cs/cifar10'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"<code>root</code>  <code>property</code>","text":"<p>Dummy path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. Can be 'train' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"<code>DatasetConfig</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Abstract base class for dataset configurations.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"<code>n_examples</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Number of examples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"<code>root</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"<code>FakeImg(n_examples=10)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"<code>FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Tiny synthetic segmentation dataset for tests.</p> <p>Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which class index is considered background.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"<code>content_tokens_per_example = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"<code>n_classes = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of segmentation classes.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"<code>n_examples = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of examples.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"<code>FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Synthetic segmentation dataset providing pixel-level segmentation masks.</p> <p>Mimics ImgSegFolderDataset by providing:</p> <ul> <li>image: a dummy RGB PIL image</li> <li>segmentation: a PIL image with pixel-level class labels</li> <li>index, target, label</li> </ul> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"<code>Imagenet(name='ILSVRC/imagenet-1k', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace Imagenet.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"<code>name = 'ILSVRC/imagenet-1k'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this..</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"<code>ImgFolder(root=pathlib.Path('./data/split'))</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"<code>root = pathlib.Path('./data/split')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored. Can be a glob pattern to match multiple directories.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"<code>ImgFolderDataset(*args, sample_transform=None, **kwargs)</code>","text":"<p>               Bases: <code>ImageFolder</code></p> <p>A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"<code>__getitem__(index)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>index</code> <code>int</code> <p>Index</p> required <p>Returns:</p> Type Description <code>dict[str, object]</code> <p>dict with keys 'data', 'index', 'target' and 'label'.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __getitem__(self, index: int) -&gt; dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"<code>ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Background label.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"<code>labels_csv = 'labels.csv'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>CSV file with columns: stem,label1,label2,... First column must be 'stem'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"<code>root = pathlib.Path('./data/segdataset')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"<code>split = 'training'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"<code>get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>Gets the dataset for the current experiment; delegates construction to dataset-specific functions.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Config</code> <p>Config for the dataset.</p> required <code>data_tr</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> required <code>mask_tr</code> <p>Transform to be applied to masks.</p> required <code>dict_tr</code> <p>Transform to be applied to the entire sample dict.</p> required <p>Returns:     A dataset that has dictionaries with <code>'data'</code>, <code>'index'</code>, <code>'target'</code>, and <code>'label'</code> keys containing examples.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"<code>is_img_seg_dataset(data_cfg)</code>","text":"<p>Check if a dataset configuration is for an image segmentation dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data_cfg</code> <code>DatasetConfig</code> <p>Dataset configuration</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if this is an image segmentation dataset that should have labels.bin</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -&gt; bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n</code></pre>"},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"<code>Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None)</code>  <code>dataclass</code>","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"<code>depth = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of transformer blocks.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"<code>device = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Device for tensor operations.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"<code>embed_dim = 768</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Embedding dimension for transformer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"<code>ffn_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in feed-forward network.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"<code>ffn_layer = 'mlp'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Type of feed-forward network layer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"<code>ffn_ratio = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Feed-forward network expansion ratio.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"<code>img_size = 224</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Image width and height in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"<code>in_chans = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of input image channels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"<code>mask_k_bias = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to mask K bias in attention.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"<code>n_storage_tokens = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of storage/register tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"<code>num_heads = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of attention heads.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"<code>patch_size = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of each patch in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"<code>pos_embed_rope_base = 100.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Base frequency for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"<code>pos_embed_rope_dtype = 'bf16'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data type for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"<code>pos_embed_rope_max_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"<code>pos_embed_rope_min_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Minimum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"<code>pos_embed_rope_normalize_coords = 'separate'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Coordinate normalization method for RoPE encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"<code>proj_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in output projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"<code>qkv_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in QKV projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"<code>untie_global_and_local_cls_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use separate norms for global and local CLS tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"<code>PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)</code>","text":"<p>               Bases: <code>Module</code></p> <p>2D image to patch embedding: (B,C,H,W) -&gt; (B,N,D)</p> <p>Parameters:</p> Name Type Description Default <code>img_size</code> <code>int | tuple[int, int]</code> <p>Image size.</p> <code>224</code> <code>patch_size</code> <code>int | tuple[int, int]</code> <p>Patch token size.</p> <code>16</code> <code>in_chans</code> <code>int</code> <p>Number of input image channels.</p> <code>3</code> <code>embed_dim</code> <code>int</code> <p>Number of linear projection output channels.</p> <code>768</code> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -&gt; None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n</code></pre>"},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"<p>Fake CLIP model for testing with tiny-open-clip-model.</p> <p>This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Tiny model uses 2x2 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for tiny model (8x8 images).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"<code>Dataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"<code>Transformer</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Protocol defining the interface for all Transformer models.</p>"},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"<code>patch_size</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Patch size in pixels (e.g., 14 or 16).</p>"},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"<code>forward(batch)</code>  <code>abstractmethod</code>","text":"<p>Run forward pass on batch of images.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -&gt; Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"<code>get_residuals()</code>  <code>abstractmethod</code>","text":"<p>Return the list of residual blocks/layers for hook registration.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_residuals(self) -&gt; list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"<code>get_token_i(content_tokens_per_example)</code>  <code>abstractmethod</code>","text":"<p>Return indices for selecting relevant tokens from activations.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -&gt; slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"<code>make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"<code>make_transforms(ckpt, content_tokens_per_example)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.list_families","title":"<code>list_families()</code>","text":"<p>List all ViT family names.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>def list_families() -&gt; list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n</code></pre>"},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"<code>load_model_cls(family)</code>","text":"<p>Load a transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef load_model_cls(family: str) -&gt; type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n</code></pre>"},{"location":"api/data/models/#saev.data.models.register_family","title":"<code>register_family(cls)</code>","text":"<p>Register a new transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n</code></pre>"},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"<p>Ordered (sequential) dataloader for activation data.</p> <p>This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.</p> <p>Patch labels are provided if there is a labels.bin file on disk.</p> <p>See the design decisions in src/saev/data/performance.md.</p> Usage <p>cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ...     activations = batch[\"act\"]  # [batch_size, d_model] ...     image_indices = batch[\"example_idx\"]  # [batch_size] ...     patch_indices = batch[\"token_idx\"]  # [batch_size] ...     patch_labels = batch[\"patch_labels\"]  # [batch_size]</p>"},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/pe/","title":"saev.data.pe","text":"<p>Perception Encoder (PE) models from Meta (Bolya et al., 2025).</p> <p>PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Both are available via timm.</p>"},{"location":"api/data/pe/#saev.data.pe.Core","title":"<code>Core(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Core: CLIP-style model for language alignment.</p> <p>Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"<code>Spatial(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"<code>IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"<code>IndexedDataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"<code>OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"<code>OrderedDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"<code>ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"<code>ShuffledDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"<code>make_ordered_config(shuffled_cfg, **overrides)</code>","text":"<p>Create an <code>OrderedConfig</code> from a <code>ShuffledConfig</code>, with optional overrides.</p> <p>Defaults come from <code>shuffled_cfg</code> for fields present in <code>OrderedConfig</code>, and <code>overrides</code> take precedence. Unknown override fields raise <code>TypeError</code> from the <code>OrderedConfig</code> constructor, mirroring <code>dataclasses.replace</code>.</p> Source code in <code>src/saev/data/__init__.py</code> <pre><code>@beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -&gt; OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n</code></pre>"},{"location":"api/data/shards/","title":"saev.data.shards","text":"<p>Library code for reading and writing sharded activations to disk.</p>"},{"location":"api/data/shards/#saev.data.shards.Index","title":"<code>Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard)</code>  <code>dataclass</code>","text":"<p>Attributes:</p> Name Type Description <code>idx</code> <code>int</code> <p>The index of the activation.</p> <code>example_idx</code> <code>int</code> <p>The index of the original example (image, audio clip etc).</p> <code>content_token_idx</code> <code>int</code> <p>The token's index within an example's content. -1 for all special tokens.</p> <code>shard_idx</code> <code>int</code> <p>The shard index.</p> <code>example_idx_in_shard</code> <code>int</code> <p>The example index along the examples axis in a shard.</p> <code>token_idx_in_shard</code> <code>int</code> <p>The token index along the tokens axis in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"<code>IndexMap(md, tokens, layer)</code>","text":"<p>Attributes:</p> Name Type Description <code>md</code> <code>Metadata</code> <p>Metadata</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to load.</p> <code>layer</code> <code>int</code> <p>Which layer to load.</p> <code>layer_idx_lookup</code> <code>dict[int, int]</code> <p>The lookup from a transformer layer to the layer idx in the shard.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"<code>LabelsWriter(shards_dir, md)</code>","text":"<p>LabelsWriter handles writing patch-level segmentation labels to a single binary file.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>The shard directory; $SAEV_SCRATCH/saev/shards/ required <code>md</code> <code>Metadata</code> <p>The Metadata object.</p> required <p>Attributes:</p> Name Type Description <code>labels</code> <code>UInt8[ndarray, 'n_examples n_patches']</code> <p>The integer patch labels.</p> <code>labels_path</code> <code>Path</code> <p>Where the integer patch labels are stored.</p> <code>md</code> <code>Metadata</code> <p>The dataset metadata.</p> <code>has_written</code> <code>bool</code> <p>Whether we have written any data to <code>self.labels</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"<code>flush()</code>","text":"<p>Flush the memory-mapped file to disk if anything was written.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def flush(self) -&gt; None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"<code>write_batch(batch_labels, start_idx)</code>","text":"<p>Write a batch of labels to the memory-mapped file.</p> <p>Parameters:</p> Name Type Description Default <code>batch_labels</code> <code>ndarray | Tensor</code> <p>Array of shape (batch_size, content_tokens_per_example) with uint8 dtype</p> required <code>start_idx</code> <code>int</code> <p>Starting index in the global labels array</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size &lt;= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"<code>RecordedTransformer(model, content_tokens_per_example, cls_token, layers)</code>","text":"<p>               Bases: <code>Module</code></p> <p>A wrapper around a transformer model that records intermediate layer activations during forward passes.</p> <p>Parameters:</p> Name Type Description Default <code>model</code> <code>Module</code> <p>The transformer model to wrap.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether to record the [CLS] token in addition to content tokens.</p> required <code>layers</code> <code>Sequence[int]</code> <p>Which transformer layers to record activations from.</p> required <p>Attributes:</p> Name Type Description <code>model</code> <code>Module</code> <p>The wrapped transformer model.</p> <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> <code>cls_token</code> <code>bool</code> <p>Whether the [CLS] token is included in recorded activations.</p> <code>layers</code> <code>Sequence[int]</code> <p>Tuple of layer indices being recorded.</p> <code>token_i</code> <code>slice</code> <p>Token indices to extract from model outputs.</p> <code>logger</code> <p>Logger instance for this recorder.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Shard","title":"<code>Shard(name, n_examples)</code>  <code>dataclass</code>","text":"<p>A single shard entry in shards.json, recording the filename and number of examples.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The filename of the shard (e.g., \"acts000000.bin\").</p> <code>n_examples</code> <code>int</code> <p>Number of examples stored in this shard.</p>"},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"<code>ShardInfo(shards=list())</code>  <code>dataclass</code>","text":"<p>A container for shard metadata as recorded in shards.json.</p> <p>Parameters:</p> Name Type Description Default <code>shards</code> <code>list[Shard]</code> <p>A list of Shard objects.</p> <code>list()</code>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"<code>ShardWriter(shards_root, md)</code>","text":"<p>ShardWriter is a stateful object that handles sharded activation writing to disk.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>The $SAEV_SCRATCH/saev/shards path.</p> required <code>md</code> <code>Metadata</code> <p>The Metadata object for these shards.</p> required <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>The  $SAEV_SCRATCH/saev/shards/. <code>shard</code> <code>int</code> <code>acts_path</code> <code>Path</code> <code>acts</code> <code>Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None</code> <code>filled</code> <code>int</code> <code>labels_writer</code> <code>LabelsWriter</code> <p>The LabelsWriter writer.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"<code>__enter__()</code>","text":"<p>Context manager entry.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"<code>__exit__(exc_type, exc_val, exc_tb)</code>","text":"<p>Context manager exit - handle cleanup.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"<code>write_batch(activations, start_idx, patch_labels=None)</code>","text":"<p>Write a batch of activations and (optionally) patch labels.</p> <p>Parameters:</p> Name Type Description Default <code>activations</code> <code>Float[Tensor, 'batch n_layers all_patches d_model']</code> <p>Batch of activations to write.</p> required <code>start_idx</code> <code>int</code> <p>Starting index for this batch.</p> required <code>patch_labels</code> <code>UInt8[Tensor, 'batch n_patches'] | None</code> <p>Optional patch labels for segmentation datasets.</p> <code>None</code> Source code in <code>src/saev/data/shards.py</code> <pre><code>def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -&gt; None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx &gt;= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit &lt; batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 &lt;= {start_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        assert 0 &lt;= start_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 &lt;= {end_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 &lt;= end_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"<code>get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)</code>","text":"<p>Get a dataloader for a default map-style dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Config</code> <p>Config for the dataset.</p> required <code>batch_size</code> <code>int</code> <p>Batch size.</p> required <code>n_workers</code> <code>int</code> <p>Number of dataloader workers.</p> required <code>data_tr</code> <code>Callable | None</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> <code>None</code> <code>mask_tr</code> <code>Callable | None</code> <p>Transform to be applied to masks.</p> <code>None</code> <code>sample_tr</code> <code>Callable | None</code> <p>Transform to be applied to the entire sample dict.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataLoader</code> <p>A PyTorch Dataloader that yields dictionaries with <code>'data'</code> keys containing data batches, <code>'index'</code> keys containing original dataset indices and <code>'label'</code> keys containing label batches.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -&gt; torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers &gt; 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"<code>pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)</code>","text":"<p>Convert pixel-level segmentation to patch-level labels using vectorized operations.</p> <p>Parameters:</p> Name Type Description Default <code>seg</code> <code>Image</code> <p>Pixel-level segmentation mask as PIL Image</p> required <code>n_patches</code> <code>int</code> <p>Total number of patches expected</p> required <code>patch_size</code> <code>int</code> <p>Size of each patch in pixels</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>How to aggregate pixel labels into patch labels</p> <code>MAJORITY</code> <code>bg_label</code> <code>int</code> <p>Background label index</p> <code>0</code> <code>max_classes</code> <code>int</code> <p>Maximum number of classes (for bincount)</p> <code>256</code> <p>Returns:</p> Type Description <code>UInt8[Tensor, ' n_patches']</code> <p>Patch labels as uint8 tensor of shape (n_patches,)</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -&gt; UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -&gt; (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) &gt; 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"<code>worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>family</code> <code>str</code> <p>Transformer family (dinov2, dinov3, clip, etc).</p> required <code>ckpt</code> <code>str</code> <p>Transformer ckpt (hf-hub:imageomics/bioclip2, etc).</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token.</p> required <code>d_model</code> <code>int</code> <p>Hidden dimension of transformer.</p> required <code>layers</code> <code>list[int]</code> <p>The layers to record activations for.</p> required <code>data</code> <code>Config</code> <p>Config for the particular (image) dataset to load.</p> required <code>batch_size</code> <code>int</code> <p>Batch size for the dataset.</p> required <code>n_workers</code> <code>int</code> <p>Number of workers for loading examples fromm the dataset.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>Maximum number of tokens per disk shard.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>Optional method for aggregating segmentation label pixels.</p> <code>MAJORITY</code> <code>shards_root</code> <code>Path</code> <p>Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.</p> required <code>device</code> <code>str</code> <p>Device for doing the computation.</p> required <p>Returns:</p> Type Description <code>Path</code> <p>Path to the shards directory.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -&gt; pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n</code></pre>"},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"<code>conv2d_to_tokens(x_bchw, conv)</code>","text":"<p>Conv2d then flatten spatial to L, return (B, L, D).</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -&gt; b (h w) d\")\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"<code>resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)</code>","text":"<p>Resize image to (w, h) so that:   - w % p == 0, h % p == 0   - (h/p) * (w/p) == N   - Minimizes change in aspect ratio.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -&gt; Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p &lt;= 0 or n &lt;= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist &lt; best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"<code>unfolded_conv2d(x_bchw, conv)</code>","text":"<p>Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -&gt; b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n</code></pre>"},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"<p>Script for dumping SAE inference artifacts in a single pass over the dataset.</p> <p>Default mode writes 5 files:</p> <ol> <li>mean_values.pt</li> <li>sparsity.pt</li> <li>distributions.pt</li> <li>token_acts.npz</li> <li>metrics.json</li> </ol> <p>If save=False, only metrics.json is written.</p> <p>metrics.json is serialized from <code>saev.metrics.Metrics</code>.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"<code>Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for computing image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"<code>data = OrderedConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data configuration</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which accelerator to use.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"<code>force_recompute = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Force recomputation even if files exist.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which token labels to ignore when calculating summarized image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"<code>mem_gb = 80</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"<code>n_dists = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features to save distributions for.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"<code>n_hours = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"<code>run = pathlib.Path('./runs/abcdefg')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Path to the sae.pt file.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"<code>save = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to write token_acts/statistics files. If False, only metrics.json is written.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/inference/#saev.framework.inference.main","title":"<code>main(cfg, sweep=None)</code>","text":"<p>Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config inference.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> Source code in <code>src/saev/framework/inference.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n</code></pre>"},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"<p>Submitit entrypoint modules for SAE workflows.</p> <p><code>saev.framework</code> is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.</p>"},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"<p>To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.</p> <p>This script handles that additional complexity.</p> <p>Conceptually, activations are either thought of as</p> <ol> <li>A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset</li> <li>Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.</li> </ol>"},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"<code>Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs')</code>  <code>dataclass</code>","text":"<p>Configuration for calculating and saving ViT activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"<code>batch_size = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size for ViT inference.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"<code>ckpt = 'ViT-L-14/openai'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Specific model checkpoint.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"<code>cls_token = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the model has a [CLS] token.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"<code>content_tokens_per_example = 256</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dimension of the ViT activations (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"<code>data = dataclasses.field(default_factory=(datasets.Imagenet))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which dataset to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which device to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"<code>family = 'clip'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which model family.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"<code>layers = dataclasses.field(default_factory=(lambda: [-2]))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which layers to save. By default, the second-to-last layer.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"<code>log_to = './logs'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"<code>max_tokens_per_shard = 2400000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"<code>n_workers = 8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloader workers.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"<code>shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to write shards.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"<code>ssl = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use SSL.</p>"},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"<code>cli(cfg)</code>","text":"<p>Save ViT activations for use later on.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Configuration for activations.</p> required Source code in <code>src/saev/framework/shards.py</code> <pre><code>@beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n</code></pre>"},{"location":"api/framework/train/","title":"saev.framework.train","text":"<p>Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.</p> <p>Checklist for making sure your training doesn't suck:</p> <ul> <li>[ ] Data scaling: scale vectors so their average L2 norm is sqrt(n).</li> <li>[ ] Initialize b_e such that each feature activates 10K * d_model / (n * d_sae) of the time, which means that on average, each example activates 10K features.</li> <li>[x] Initialize b_d to 0.</li> <li>[x] Sweep learning rate and sparsity coefficients.</li> <li>[ ] Decay learning rate to 0 over the last 20% of training.</li> <li>[ ] Warmup sparsity over all of training.</li> <li>[x] Gradient clipping (clip at 1 with clip_grad_norm)</li> <li>[x] Track dead latents through training</li> </ul>"},{"location":"api/framework/train/#saev.framework.train.Config","title":"<code>Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for training a sparse autoencoder on a vision transformer.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Hardware device.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"<code>grad_clip = 1.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum gradient norm across all SAE parameters.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"<code>log_every = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How often to log to WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"<code>lr = 0.0004</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Learning rate.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"<code>mem_gb = 128</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"<code>n_lr_warmup = 500</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of learning rate warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"<code>n_sparsity_warmup = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of sparsity coefficient warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"<code>n_train = 100000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE training samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"<code>n_val = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE evaluation samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"<code>objective = nn.objectives.Matryoshka()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE objective configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"<code>optim = 'adam'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Optimizer for training.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"<code>runs_root = pathlib.Path('$SAEV_NFS/saev/runs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory for runs.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"<code>sae = nn.SparseAutoencoderConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"<code>seed = 42</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"<code>tags = ()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tags to add to WandB run.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"<code>track = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to track with WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"<code>train_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Training data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"<code>val_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Validation data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"<code>wandb_project = 'saev'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>WandB project name.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"<code>EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold)</code>  <code>dataclass</code>","text":"<p>Results of evaluating a trained SAE on a datset.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"<code>almost_dead_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for an \"almost dead\" neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"<code>dense_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for a dense neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"<code>freqs</code>  <code>instance-attribute</code>","text":"<p>How often each feature fired.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Mean L0 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Mean L1 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"<code>mean_values</code>  <code>instance-attribute</code>","text":"<p>The mean value for each feature when it did fire.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Mean MSE across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"<code>n_almost_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on fewer than <code>almost_dead_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that never fired on any example.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"<code>n_dense</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on more than <code>dense_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"<code>normalized_mse</code>  <code>instance-attribute</code>","text":"<p>Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"<code>sse_baseline</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the mean baseline.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"<code>sse_sae</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the SAE.</p>"},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"<code>evaluate(cfgs, saes, objectives)</code>","text":"<p>Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.</p> <p>The metrics computed are mean <code>L0</code>/<code>L1</code>/<code>MSE</code> losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of <code>EvalMetrics</code> is returned, one for each SAE.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -&gt; list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x &gt; 0, \"batch d_sae -&gt; d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -&gt; d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens &gt; 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline &gt; 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs &lt; almost_dead_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs &gt; dense_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.main","title":"<code>main(cfg, sweep=None, max_parallel=None)</code>","text":"<p>Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config for training an SAE.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> <code>max_parallel</code> <code>int | None</code> <p>Maximum SAEs to train concurrently within a single worker.</p> <code>None</code> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"<code>split_cfgs(cfgs)</code>","text":"<p>Splits configs into groups that can be parallelized.</p> <p>Parameters:</p> Name Type Description Default <code>cfgs</code> <code>list[Config]</code> <p>A list of configs from a sweep file.</p> required <p>Returns:</p> Type Description <code>list[list[Config]]</code> <p>A list of lists, where the configs in each sublist do not differ in any keys that are in <code>CANNOT_PARALLELIZE</code>. This means that each sublist is a valid \"parallel\" set of configs for <code>train</code>.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -&gt; list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.train","title":"<code>train(cfgs)</code>","text":"<p>Explicitly declare the optimizer, schedulers, dataloader, etc outside of <code>main</code> so that all the variables are dropped from scope and can be garbage collected.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -&gt; tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch &gt; 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse &gt; 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() &gt; 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |&lt;w_i, w_j&gt;| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n</code></pre>"},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"<p>Neural network architectures for sparse autoencoders.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"<code>AuxK(key='auxk', k_aux=512, alpha=1 / 32)</code>  <code>dataclass</code>","text":"<p>AuxK auxiliary reconstruction loss for dead latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"<code>BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero per sample in the batch.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"<code>BatchTopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>BatchTopK activation and inference-time threshold for sparse autoencoders.</p> <p>This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.</p> <p>Training mode (model.train()):     Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly <code>top_k</code> active features per example while allowing the \"activation budget\" to move between examples in the batch.</p> <pre><code>During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n</code></pre> <p>Eval mode (model.eval()):     At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:</p> <pre><code>    y = x if x &gt; theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n</code></pre> Inputs <p>x: Tensor of shape [batch, d_sae] containing pre-activation codes.</p> Outputs <p>Tensor of shape [batch, d_sae] with the same dtype and device as x, where either:     - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or     - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to each sample in the batch.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold &lt;= 0:\n            return torch.where(x &gt; 0, x, torch.zeros_like(x))\n\n        return torch.where(x &gt; self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x &gt; 0]\n        if pos.numel() &gt;= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"<code>NoAux(key='no-aux')</code>  <code>dataclass</code>","text":"<p>No auxiliary loss (e.g., for ReLU).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"<code>NoSparsity(key='no-sparsity')</code>  <code>dataclass</code>","text":"<p>No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"<code>Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux())</code>  <code>dataclass</code>","text":"<p>Vanilla ReLU</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"<code>TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"<code>TopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Top-K activation function. For use as activation function of sparse encoder.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to the input tensor.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"<code>Loss()</code>  <code>dataclass</code>","text":"<p>The loss term for an autoencoder training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"<code>Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000)</code>  <code>dataclass</code>","text":"<p>Config for the Matryoshka loss for another arbitrary SAE class.</p> <p>Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"<code>dead_threshold_tokens = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tokens without activation before a latent is considered dead.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"<code>n_prefixes = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of random length prefixes to use for loss calculation.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"<code>MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>Loss</code></p> <p>The composite loss terms for an training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"<code>aux</code>  <code>instance-attribute</code>","text":"<p>Auxiliary loss term (e.g., AuxK).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Sum of L0 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Sum of L1 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Average of reconstruction loss (mean squared error) for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of dead latents (per aux loss threshold).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"<code>sparsity</code>  <code>instance-attribute</code>","text":"<p>Sparsity loss, typically lambda * L1.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"<code>MatryoshkaObjective(cfg)</code>","text":"<p>               Bases: <code>Objective</code></p> <p>Torch module for calculating the matryoshka loss for an SAE.</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n</code></pre>"},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"<code>sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)</code>","text":"<p>Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)</p> <p>Parameters:</p> Name Type Description Default <code>d_sae</code> <code>int</code> <p>Total number of latent dimensions</p> required <code>n_prefixes</code> <code>int</code> <p>Number of prefixes to sample</p> required <code>min_prefix_length</code> <code>int</code> <p>Minimum length of any prefix</p> <code>1</code> <code>pareto_power</code> <code>float</code> <p>Power parameter for Pareto distribution (lower = more uniform)</p> <code>0.5</code> <p>Returns:</p> Type Description <code>Int64[Tensor, ' n_prefixes']</code> <p>torch.Tensor: Sorted prefix lengths</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>@torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -&gt; Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes &lt;= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes &lt;= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n</code></pre>"},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec. .clone() is critical: without it, W_enc is a transposed VIEW sharing storage with W_dec. That means load_state_dict overwrites W_dec when it loads W_enc.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"<code>DataloaderMonitor(dataloader, process_factory=None)</code>","text":"<p>Tracks IO and CPU activity for the dataloader manager process and its children.</p> <p>The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call <code>compute()</code> whenever metrics are needed.</p> Source code in <code>src/saev/utils/monitoring.py</code> <pre><code>def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -&gt; None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n</code></pre>"},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"<code>BatchLimiter(dataloader, n_samples)</code>","text":"<p>Limits the number of batches to only return <code>n_samples</code> total samples.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"<code>__getattr__(name)</code>","text":"<p>Pass through attribute access to the wrapped dataloader.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __getattr__(self, name: str) -&gt; Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"<code>Warmup(init, final, n_steps)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>final</code> over <code>n_warmup_steps</code> steps.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"<code>WarmupCosine(init, n_warmup, peak, n_steps, final)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>peak</code> over <code>n_warmup</code> steps, then decrease down to final using cosine decay over n_steps - n_warmup.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"<code>PercentileEstimator(percentile, total, lr=0.001, shape=())</code>","text":"Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"<code>update(x)</code>","text":"<p>Update the estimator with a new value.</p> <p>This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>float | Tensor</code> <p>The new value to incorporate into the estimation</p> required Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"<code>calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)</code>","text":"<p>Compute entropy and coverage metrics for a batch of shuffled indices.</p> <p>The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.</p> Source code in <code>src/saev/utils/statistics.py</code> <pre><code>@beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -&gt; dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples &lt;= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example &lt;= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n</code></pre>"},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"<code>ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')</code>","text":"<p>Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387</p> Source code in <code>src/saev/utils/wandb.py</code> <pre><code>def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project, config=cfg, mode=mode, tags=tags, dir=dir\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n</code></pre>"},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"<pre><code>docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n</code></pre>"},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"<p>Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.</p> <p>Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:</p> <ol> <li>Select \\(n\\) random data points from your training data.</li> <li>Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).</li> <li>Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.</li> <li>Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.</li> <li>Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).</li> </ol> <p>Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).</p>"},{"location":"developers/disk-layout/","title":"Storage &amp; Run Manifest Spec (v1)","text":"<p>There are two main locations:</p> <ol> <li><code>$SAEV_SCRATCH/saev/shards</code>: where we store transformer activations (referred to as <code>shards_root</code> in the codebase).</li> <li><code>$SAEV_NFS/saev/runs</code>: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as <code>runs_root</code> in the codebase).</li> </ol> <p>Visually, these are:</p> <pre><code>$SAEV_SCRATCH/saev/\n  shards/\n    &lt;shard_hash&gt;/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n</code></pre> <p>and</p> <pre><code>$SAEV_NFS/saev/\n  runs/\n    &lt;run_id&gt;/\n      checkpoint/           # output of train.py on &lt;shard_hash&gt;\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        &lt;shard_hash&gt;/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n</code></pre> <p>Each <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/</code> MUST include:</p> <ul> <li><code>metadata.json</code> (UTF-8, canonical spec; see <code>protocol.md</code>)</li> <li><code>shards.json</code> (UTF-8, shard index and sizes; see <code>protocol.md</code>)</li> <li><code>acts*.bin</code> (binary shards; format in <code>protocol.md</code>)</li> <li><code>labels.bin</code> (binary patch labels aligned to shards; format in <code>protocol.md</code>)</li> </ul> <p>Note</p> <p>Immutability: Files under <code>saev/shards/&lt;shard_hash&gt;/</code> MUST be treated as read-only after publication. Any change yields a new <code>shard_hash</code>.</p> <p>All CLI entrypoints should accept a single <code>--run &lt;path&gt;</code> argument. Every other path MUST be resolved from the run root:</p> <ul> <li>ViT activations: <code>links/shards</code> \u2192 <code>saev/shards/&lt;shard_hash&gt;</code></li> <li>Dataset: <code>links/dataset</code> \u2192 Dataset root, wherever it is on disk.</li> <li>SAE checkpoint: <code>checkpoint/sae.pt</code></li> </ul> <p>Example resolution:</p> <pre><code>run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n</code></pre> <ul> <li><code>$SAEV_SCRATCH</code> and <code>$SAEV_NFS</code> should be set for all users/processes running saev tools.</li> </ul>"},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":"<ul> <li> <p>Where do patch labels live? Next to <code>acts*.bin</code> in <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/labels.bin</code>. Scripts discover them via <code>links/shards/labels.bin</code>.</p> </li> <li> <p>Can I put datasets directly in <code>$SAEV_SCRATCH</code>? Sure, but not in <code>$SAEV_SCRATCH/shards</code>.</p> </li> </ul>"},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"<p>saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). <code>saev.data</code> has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance. </p> <p>Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:</p> <ul> <li>mem-mappable</li> <li>Parameterized solely by the experiment configuration (<code>scripts/shards.py:Config</code>)</li> <li>Referenced by a content-hash, so identical configs collide, divergent ones never do</li> <li>Can be read quickly in a random order for training, and can be read (slowly) with random-access for visuals.</li> </ul> <p>This document is the single normative source. Any divergence in code is a bug.</p>"},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"<pre><code>&lt;dump_to&gt;/&lt;HASH&gt;/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n</code></pre> <p><code>HASH</code> = <code>sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8'))</code> Guards against silent config drift.</p>"},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. <code>metadata.json</code>","text":"field type semantic <code>family</code> string <code>\"clip\" \\| \"siglip\" \\| \"dinov2\"</code> <code>ckpt</code> string model identifier (OpenCLIP, HF, etc.) <code>layers</code> int[] ViT residual\u2010block indices recorded <code>patches_per_ex</code> int example patches only (excludes CLS) <code>cls_token</code> bool <code>true</code> -&gt; patch 0 is CLS, else no CLS <code>d_model</code> int activation dimensionality <code>n_examples</code> int total examples in dataset <code>patches_per_shard</code> int logical activations per shard (see #3) <code>data</code> object opaque dataset description <code>dataset</code> string absolute path to original dataset root <code>dtype</code> string numpy dtype. Fixed <code>\"float32\"</code> for now. <code>protocol</code> string <code>\"2.1\"</code> (shards after big refactor) <p>The <code>data</code> object is <code>base64.b64encode(pickle.dumps(img_ds)).decode('utf8')</code>.</p> <p>The <code>dataset</code> field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.</p>"},{"location":"developers/protocol/#22-shardsjson","title":"2.2. <code>shards.json</code>","text":"<p>A single array of <code>shard</code> objects, each of which has the following fields:</p> field type semantic name string shard filename (<code>acts000000.bin</code>). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"<pre><code>tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard, len(layers), tokens_per_ex, d_model,\n)\n</code></pre> <p><code>patches_per_shard</code> is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ <code>d_model = 1024</code>.</p> <p>The last shard will have a smaller value for <code>examples_per_shard</code>; this value is documented in <code>n_examples</code> in <code>shards.json</code></p>"},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"<p>The entire dataset of activations is treated as a single logical 4D tensor with the shape <code>(n_examples, len(layers), tokens_per_ex, d_model)</code>. This logical tensor is C-contiguous with axes ordered <code>[Example, Layer, Token, Dimension]</code>.</p> <p>Physically, this tensor is split along the first axis (<code>Example</code>) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.</p> <p>To locate an arbitrary activation vector, a reader must convert a logical coordinate (<code>global_ex_idx</code>, <code>layer_value</code>, <code>token_idx</code>) into a file path and an offset within that file.</p>"},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"<p>Let the parameters from <code>metadata.json</code> be:</p> <ul> <li>L = <code>len(layers)</code></li> <li>P = <code>patches_per_ex</code></li> <li>T = <code>P + (1 if cls_token else 0)</code> (Total tokens per example)</li> <li>D = <code>d_model</code></li> <li>S = <code>n_examples</code> from <code>shards.json</code> or <code>examples_per_shard</code> from Section 3 (shard sizing).</li> </ul>"},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"<p>Given a logical coordinate:</p> <ul> <li><code>global_ex_idx</code>: integer, with <code>0 &lt;= global_ex_idx &lt; n_examples</code></li> <li><code>layer</code>: integer, must be an element of <code>layers</code></li> <li><code>token_idx</code>: integer, <code>0 &lt;= token_idx &lt; T</code></li> </ul> <p>The physical location is found as follows:</p> <ol> <li> <p>Identify Shard:</p> <ul> <li><code>shard_idx = global_ex_idx // S</code></li> <li><code>ex_in_shard = global_ex_idx % S</code> The target file is <code>acts{shard_idx:06d}.bin</code>.</li> </ul> </li> <li> <p>Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical <code>layer_value</code> must be mapped to its index in the stored <code>layers</code> array.</p> <ul> <li><code>layer_idx = layers.index(layer)</code> A reader must raise an error if <code>layer</code> is not in <code>layers</code>.</li> </ul> </li> <li> <p>Calculate Offset: The data within a shard is a 4D tensor of shape <code>(S, L, T, D)</code>. The offset to the first byte of the desired activation vector <code>[ex_in_shard, layer_idx , token_idx]</code> is:</p> <ul> <li><code>offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx</code></li> <li><code>offset_in_bytes = offset_in_vectors * D * 4</code> (assuming 4 bytes for <code>float32</code>)</li> </ul> </li> </ol> <p>A reader can then seek to <code>offset_in_bytes</code> and read \\(D \\times 4\\) bytes to retrieve the vector.</p> <p>Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.</p>"},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"<p>The <code>token</code> axis of length \\(T\\) is ordered as follows: * If <code>cls_token</code> is <code>true</code>:     * Index <code>0</code>: [CLS] token activation     * Indices <code>1</code> to \\(P\\): Patch token activations * If <code>cls_token</code> is <code>false</code>:     * Indices <code>0</code> to \\(P-1\\): Patch token activations</p> <p>The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.</p>"},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning &amp; compatibility","text":"<ul> <li>Major changes (shape reorder, dtype switch, new required JSON keys) increment the major protocol version number at the top of this document and must emit a breaking warning in loader code.</li> <li>Minor, backward-compatible additions (new optional JSON key) merely update this doc and the minor protocol version number.</li> </ul> <p>That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.</p>"},{"location":"developers/workflows/","title":"Workflows","text":"<ol> <li>Generate inference activations (and thus visuals) for both training and validation splits.</li> </ol>"},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"<p>This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (<code>block.norm2</code>) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.</p>"},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"<p>While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.</p>"},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"<p>First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.</p> <p></p> <p>I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.</p> <p></p>"},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"<p>Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.</p> <p></p> <p>My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:</p> <pre><code>bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n</code></pre> <p>Something is broken inside of BirdMAE.</p>"},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"<p>Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.</p> <p></p> <p>BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.</p> <p></p> <p>Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.</p>"},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"<p>Below is the output from the attention layers (Graph #2) in our architecture diagram.</p> <p></p> <p>Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.</p> <p></p> <p>Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?</p> <p>Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.</p> <p></p> <p><code>fc2</code> has a <code>weight</code> parameter with shape (4096, 1024) and a <code>bias</code> parameter with shape (1024,). I take the L2 norm of <code>fc2.weight</code>'s columns to see if col 296/1024 is different.</p> <p></p> <p><code>fc2.weight</code> does appear to be different, and abnormally large (note the log scale). <code>fc2.bias</code> is also different, but it's not immediately obvious what's going on there to me.</p>"},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"<p>This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.</p> <p>We verified this by inspecting <code>norm2.weight</code> across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.</p> <p>The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.</p>"},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"<p>The fix is to record activations after <code>block.norm2</code> (the pre-MLP LayerNorm) instead of from the raw residual stream. In <code>saev</code>, this is implemented as:</p> <pre><code>def get_residuals(self) -&gt; list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n</code></pre> <p>After this change, the outlier is suppressed and SAE training works normally.</p>"},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"<ol> <li>Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.</li> <li>Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.</li> <li>Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.</li> </ol>"},{"location":"users/glossary/","title":"Glossary","text":"<p>Definitions for words used in the code and documentation.</p> <ul> <li>example: one dataset item (image, sentence, audio clip, point cloud, graph instance).</li> <li>token: one model position in the encoder\u2019s residual stream (the thing with hidden size <code>d_model</code>). Always \"token\" inside the model.</li> <li>content token: tokens derived from the raw input (image patches, wordpieces, audio windows, nodes, etc.).</li> <li>special token: tokens not directly derived from the raw input (class/summary token, [SEP], [MASK], [PAD], register tokens, etc.).</li> <li>sequence length L: total tokens per example (content + special). If variable, call it \u201cragged\u201d.</li> <li>layer: an integer index into the encoder\u2019s stack.</li> <li>activation kind (optional but useful): which stream you saved (e.g., resid_pre, resid_post, mlp_out, attn_out, qkv, head_out).</li> </ul> <p>Modality-specific vocab:</p> <ul> <li>patch (vision): a 2D content token. Often laid out on a grid with shape (H_patches, W_patches).</li> <li>frame/token or tube (video): content token in time \u00d7 space; often (T, H, W).</li> <li>wordpiece / subword (text): content token from a tokenizer.</li> <li>window / frame (audio): time\u2013frequency window.</li> <li>node (graph), point (point cloud).</li> </ul>"},{"location":"users/guide/","title":"Guide","text":"<p>This guide explains how to transition from the ADE20K demo to using <code>saev</code> with your own custom datasets.</p> <p>Here are the steps:</p> <ol> <li>Save ViT activations to disk</li> <li>Train SAEs on activations</li> <li>Evaluate the SAE checkpoints</li> <li>Visualize Learned Features</li> </ol> <p>Note</p> <p><code>saev</code> assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with <code>CUDA_VISIBLE_DEVICES=X</code> to run on GPU X.</p>"},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"<p>To save activations to disk, we need to specify:</p> <ol> <li>Which model we would like to use</li> <li>Which layers we would like to save.</li> <li>Where on disk and how we would like to save activations.</li> <li>Which images we want to save activations for.</li> </ol> <p>The <code>saev/framework/shards.py</code> script does all of this for us.</p> <p>Run <code>uv run launch.py shards --help</code> to see all the configuration.</p> <p>In practice, you might run:</p> <pre><code>uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n</code></pre> <p>This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory <code>/fs/scratch/PAS2136/samuelstevens/saev/shards</code>.</p> <p>Note</p> <p>A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.</p> <p>This script will also save a <code>metadata.json</code> file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in <code>.bin</code> files, numbered starting from 000000.</p> <p>To add your own models, see the guide to extending in <code>saev.activations</code>.</p>"},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"<p>To train an SAE, we need to specify:</p> <ol> <li>Which activations to use as input.</li> <li>SAE architectural stuff.</li> <li>Optimization-related stuff.</li> </ol> <p>The <code>train.py</code> script handles this.</p> <p>Run <code>uv run train.py --help</code> to see all the configuration.</p> <p>The most important options are:</p> <ul> <li><code>--runs-root</code>: where to store runs.</li> <li><code>--train-data</code> and <code>--val-data</code>: How to load the training and validation data. You probably want to specify both <code>--{train,val}-data.shards</code> (the shard directory) and <code>--{train,val}-data.layer</code> (which layer to use).</li> <li><code>sae.activation</code>: <code>sae.activation:relu</code> to use the ReLU activation.</li> </ul> <p>This is a full example:</p> <pre><code>uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n</code></pre> <p>This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.</p>"},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"<p>The training loop logs additional loader diagnostics derived from <code>calc_batch_entropy</code> in <code>train.py</code>. Every batch contributes two entropy measurements in natural log units:</p> <ul> <li><code>loader/example_entropy</code> and <code>loader/example_entropy_normalized</code> summarize how evenly the shuffled loader samples example indices. Normalization divides the raw entropy by <code>ln(metadata.n_examples)</code> so perfectly uniform sampling is 1.0.</li> <li><code>loader/token_entropy</code> and <code>loader/token_entropy_normalized</code> do the same for patch indices using <code>ln(metadata.content_tokens_per_example)</code> as the normalizer.</li> <li><code>loader/example_coverage</code> and <code>loader/token_coverage</code> report the fraction of distinct example or patch indices seen in the current batch relative to their theoretical support.</li> </ul> <p>All eight metrics appear alongside the existing <code>loader/read_mb</code> counters, helping spot skewed sampling or under-covered patches mid-run.</p>"},{"location":"users/guide/#evaluation","title":"Evaluation","text":"<p>After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch <code>torch.nn.Module</code> in combination with a <code>saev.data.OrderedDataLoader</code> or <code>saev.data.IndexedDataset</code>.</p> <p>However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The <code>saev/framework/inference.py</code> script calculates these metrics. You can run <code>uv run launch.py inference --help</code> to see all the options.</p> <p>The most important options are:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--data</code>: The options for the OrderedDataLoader. Specifically, you need to set <code>--data.shards</code> and <code>--data.layer</code>, just like for training.</li> </ul> <pre><code>uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n</code></pre>"},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"<p>Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse <code>token_acts.npz</code> file from the previous inference step.</p> <p>Warning</p> <p>Because there are so many different ways to visualize SAE features, I moved it to <code>contrib/trait_discovery</code> (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").</p> <p>The most important options:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--shards</code>: The shards directory.</li> <li><code>--latents</code>: The 0-indexed latents to save images for.</li> <li><code>--n-latents</code>: The number of randomly selected latents to save images for.</li> </ul> <p>So first, move into the <code>contrib/trait_discovery</code>:</p> <pre><code>cd contrib/trait_discovery\n</code></pre> <p>Then run the script that generates highlighted images:</p> <pre><code>uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n</code></pre> <p>Note</p> <p>Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (<code>--include-latents</code>). I recommend using <code>saev/interactive/metrics.py</code> with marimo to figure out good thresholds.</p>"},{"location":"users/guide/#sweeps","title":"Sweeps","text":"<p>tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.</p>"},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"<p>SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.</p> <p>A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.</p>"},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"<p>To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:</p> <pre>\nflowchart TD\n    A[Pre-computed ViT Activations] --&gt;|Slow I/O| B[Memory Buffer]\n    B --&gt;|Shared Batch| C[SAE Model 1]\n    B --&gt;|Shared Batch| D[SAE Model 2]\n    B --&gt;|Shared Batch| E[SAE Model 3]\n    B --&gt;|Shared Batch| F[...]\n</pre> <p>This approach:</p> <ul> <li>Loads each batch of activations once from disk</li> <li>Uses that same batch for multiple SAE models with different hyperparameters</li> <li>Amortizes the slow I/O cost across all models in the sweep</li> </ul>"},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"<p>The <code>train</code> command accepts a <code>--sweep</code> parameter that points to a TOML file defining the hyperparameter grid:</p> <pre><code>uv run python -m saev train --sweep configs/my_sweep.toml\n</code></pre> <p>Here's an example sweep configuration file:</p> <pre><code>[sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n</code></pre> <p>This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.</p>"},{"location":"users/guide/#limitations","title":"Limitations","text":"<p>Not all parameters can be swept in parallel. Parameters that affect data loading (like <code>batch_size</code> or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.</p>"},{"location":"users/inference/","title":"Inference","text":"<p>If you want to get started quickly, try the inference notebook in marimo or on Google Colab.</p> <p>Briefly, you need to:</p> <ol> <li>Download a checkpoint.</li> <li>Get the code.</li> <li>Load the checkpoint.</li> <li>Get activations.</li> </ol> <p>Details are below.</p>"},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"<p>First, download an SAE checkpoint from the Huggingface collection.</p>"},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"<p>Some repos (CLIP, BioCLIP, DINOv2) contain a single <code>sae.pt</code> at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.</p> <p>You can use <code>wget</code> if you want:</p> <pre><code>wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n</code></pre>"},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"<p>The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a <code>manifest.jsonl</code> with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.</p> <p>Download a specific checkpoint:</p> <pre><code>from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n</code></pre> <p>Download all checkpoints in a repo:</p> <pre><code>from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n</code></pre> <p>Available DINOv3 repos:</p> <ul> <li>osunlp/SAE_DINOv3_ViT-S-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-B-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-L-16_IN1K (layers 13-23)</li> <li>osunlp/SAE_DINOv3_TopK_ViT-L-16_IN1K (layers 13-23)</li> </ul>"},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"<p>The easiest way to do this is to clone the code:</p> <pre><code>git clone https://github.com/Imageomics/saev\n</code></pre> <p>You can also install the package from git if you use uv (not sure about pip or cuda):</p> <pre><code>uv add git+https://github.com/Imageomics/saev\n</code></pre> <p>Or clone it and install it as an editable with pip, lik <code>pip install -e .</code> in your virtual environment.</p> <p>Then you can do things like <code>from saev import ...</code>.</p> <p>Note</p> <p>If you struggle to get <code>saev</code> installed, open an issue on GitHub and I will figure out how to make it easier.</p>"},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"<pre><code>import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n</code></pre> <p>Now you have a pretrained SAE.</p>"},{"location":"users/inference/#get-activations","title":"Get Activations","text":"<p>This is the hardest part. We need to:</p> <ol> <li>Pass an image into a ViT</li> <li>Record the dense ViT activations at the same layer that the SAE was trained on.</li> <li>Pass the activations into the SAE to get sparse activations.</li> <li>Do something interesting with the sparse SAE activations.</li> </ol> <p>There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the <code>get_sae_latents()</code> functions in both files.</p> <p>Below is example code to do it using the <code>saev</code> package.</p> <pre><code>import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n</code></pre> <p>Now you have the sparse representation of all patches in the image (<code>out.f_x</code>) and the reconstructed activations (<code>out.x_hats</code>).</p> <p>You might select the dimensions with maximal values for each patch and see what other images are maximally activating.</p>"},{"location":"users/new-project/","title":"New Project Structure","text":"<p>saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.</p> <p>TL;DR:</p> <ol> <li>Fork saev.</li> <li>Clone your fork.</li> <li>Create a new directory in <code>contrib/</code>.</li> <li>Update both <code>src/saev</code> and your new contrib directory as necessary.</li> <li>(Hopefully) publish.</li> <li>If your changes to <code>src/saev</code> are broadly useful and not overly restrictive, open a PR with your changes to <code>src/saev</code>.</li> </ol> <p>I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.</p> <p>First, fork and clone saev. Do this however you want, but GitHub has a guide on it.</p> <p>Second, you probably want to store code related to your project in this repo. Make a new directory in <code>contrib/</code>. I'm calling my new subproject \"birdsong.\"</p> <pre><code>[I] samuelstevens@host ~/p/saev (main)&gt; tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n</code></pre> <p>Use <code>uv</code> to make a new package inside your new project:</p> <pre><code>[I] samuelstevens@host ~/p/s/c/birdsong (main)&gt; uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n</code></pre> <p>Now you have some additional files.</p> <pre><code>[I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)&gt; tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n</code></pre> <p>Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new <code>birdsong/notebooks</code> directory, and will add <code>birdsong/logbook.md</code> to store ongoing TODO items, and so on.</p> <p>To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit <code>src/saev/data/datasets.py</code>.</p> <p>I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in <code>src/saev/data</code>.</p> <p>If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with <code>birdsong</code> because that's specific to me, rather than to the library.<sup>1</sup></p> <ol> <li> <p>Technically, <code>birdsong</code> will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9</p> </li> </ol>"},{"location":"users/sweeps/","title":"Sweeps","text":"<p>Hyperparameter sweeps in <code>saev</code> train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.</p>"},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"<p>Create a Python file defining your sweep:</p> <pre><code># sweeps/my_sweep.py\n\ndef make_cfgs() -&gt; list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Run the sweep:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n</code></pre> <p>This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.</p>"},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"<p>SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:</p> <pre><code>\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n</code></pre>"},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"<p>Python sweeps give you full control over config generation. Your sweep file must define a <code>make_cfgs()</code> function that returns a list of dicts.</p> <p>Grid search example:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Paired parameters (not a grid):</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).</p> <p>Conditional sweeps:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae &lt;= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"<p>Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI &gt; Sweep &gt; Default.</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n</code></pre> <p>Override nested config fields with dotted notation:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n</code></pre> <p>Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.</p>"},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"<p>Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like <code>train_data</code>, <code>n_train</code>, <code>device</code>) must be identical across all configs in a parallel group.</p> <p>When configs differ in these parameters, they're automatically split into separate Slurm jobs:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]</p> <p>Implementation detail</p> <p>See <code>CANNOT_PARALLELIZE</code> in <code>train.py</code> for the full list of parameters that split parallel groups. The <code>split_cfgs()</code> function handles grouping automatically.</p>"},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"<p>Your sweep file is executed as a Python module, so you can use imports and helper functions:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Import mechanics</p> <p>The sweep file is loaded with <code>importlib.import_module()</code>, so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a <code>sweeps/</code> subdirectory).</p>"},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"<p>When running with <code>--slurm-acct</code>, each parallel group becomes a separate Slurm job:</p> <pre><code>uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n</code></pre> <p>The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results</p>"},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"<p>Seeds are automatically incremented for each config to ensure reproducibility:</p> <pre><code># Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n</code></pre> <p>Override the base seed on the command line:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py --seed 100\n</code></pre>"},{"location":"users/sweeps/#examples","title":"Examples","text":"<p>Simple grid:</p> <pre><code># sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n</code></pre> <p>Layer sweep with paired train/val:</p> <pre><code># sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Architecture sweep:</p> <pre><code># sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"],"fields":{"title":{"boost":1000.0},"text":{"boost":1.0},"tags":{"boost":1000000.0}}},"docs":[{"location":"","title":"saev","text":"<p>saev is a framework for training and evaluating Sparse autoencoders (SAEs) for vision transformers (ViTs), implemented in PyTorch.</p>"},{"location":"#installation","title":"Installation","text":"<p>Installation is supported with uv. saev will likely work with pure pip, conda, etc. but I will not formally support it.</p> <p>Clone this repository, then from the root directory:</p> <pre><code>uv run scripts/launch.py --help\n</code></pre> <p>This will create a virtual environment and display the help for all the provided framework scripts.</p>"},{"location":"#quick-start","title":"Quick Start","text":"<p>Save some activations to disk:</p> <pre><code>uv run scripts/launch.py shards \\\n  --shards-root /$SCRATCH/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-32/openai \\\n  --d-model 768 \\\n  --layers 11 \\\n  --patches-per-ex 49 \\\n  --batch-size 256 \\\n  data:cifar10\n</code></pre> <p>Read the guide for details.</p>"},{"location":"#why-saev","title":"Why saev?","text":"<p>There are plenty of alternative libraries for SAEs:</p> <ul> <li>Overcomplete, primarily developed by Thomas Fel.</li> </ul> <p>However, saev has some benefits:</p> <ol> <li>saev is more of a framework, rather than a library. The reason for this is that SAEs require lots of activations to train a relatively small neural network; while you can implement it with a simple inference loop, efficient training requires some caching on disk. This means using saev is a little more like Keras or PyTorch Lightning than Huggingface's Transformers or Datasets libraries.</li> <li>saev offers lots of tools for interacting with sparse autoencoders after training, including interactive notebooks and evaluations.</li> <li>saev includes complete code from preprints in the <code>contrib/</code> directory, along with logbooks describing how the authors used and developed saev.</li> </ol>"},{"location":"api/colors/","title":"saev.colors","text":"<p>Utility color palettes used across saev visualizations.</p>"},{"location":"api/configs/","title":"saev.configs","text":""},{"location":"api/configs/#saev.configs.dict_to_dataclass","title":"<code>dict_to_dataclass(data, cls)</code>","text":"<p>Recursively convert a dictionary to a dataclass instance.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef dict_to_dataclass(data: dict, cls: type[T]) -&gt; T:\n    \"\"\"Recursively convert a dictionary to a dataclass instance.\"\"\"\n    if not dataclasses.is_dataclass(cls):\n        return data\n\n    field_types = {f.name: f.type for f in dataclasses.fields(cls)}\n    kwargs = {}\n\n    for field_name, field_type in field_types.items():\n        if field_name not in data:\n            continue\n\n        value = data[field_name]\n\n        # Handle Optional types\n        origin = tp.get_origin(field_type)\n        args = tp.get_args(field_type)\n\n        # Handle tuple[str, ...]\n        if origin is tuple and args:\n            kwargs[field_name] = tuple(value) if isinstance(value, list) else value\n        # Handle list[DataclassType]\n        elif origin is list and args and dataclasses.is_dataclass(args[0]):\n            kwargs[field_name] = [dict_to_dataclass(item, args[0]) for item in value]\n        # Handle regular dataclass fields\n        elif dataclasses.is_dataclass(field_type):\n            kwargs[field_name] = dict_to_dataclass(value, field_type)\n        # Handle pathlib.Path\n        elif field_type is pathlib.Path:\n            # Required Path field - always convert\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is tp.Union and pathlib.Path in args:\n            # Optional Path field (typing.Union style)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        elif origin is types.UnionType and pathlib.Path in args:\n            # Optional Path field (Python 3.10+ union style with |)\n            kwargs[field_name] = pathlib.Path(value) if value is not None else value\n        else:\n            kwargs[field_name] = value\n\n    return cls(**kwargs)\n</code></pre>"},{"location":"api/configs/#saev.configs.expand","title":"<code>expand(config)</code>","text":"<p>Expand a nested dict that may contain lists into many dicts.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef expand(config: dict[str, object]) -&gt; Iterator[dict[str, object]]:\n    \"\"\"Expand a nested dict that may contain lists into many dicts.\"\"\"\n    yield from _expand_discrete(dict(config))\n</code></pre>"},{"location":"api/configs/#saev.configs.get_non_default_values","title":"<code>get_non_default_values(obj, default_obj)</code>","text":"<p>Recursively find fields that differ from defaults.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef get_non_default_values(obj: T, default_obj: T) -&gt; dict:\n    \"\"\"Recursively find fields that differ from defaults.\"\"\"\n    # Check that obj and default_obj are instances of a dataclass.\n    assert dataclasses.is_dataclass(obj) and not isinstance(obj, type)\n    assert dataclasses.is_dataclass(default_obj) and not isinstance(default_obj, type)\n\n    diff = {}\n    for field in dataclasses.fields(obj):\n        obj_value = getattr(obj, field.name)\n        default_value = getattr(default_obj, field.name)\n\n        if obj_value == default_value:\n            continue\n\n        # If both are dataclasses of the same type, recurse to find nested differences\n        if (\n            dataclasses.is_dataclass(obj_value)\n            and dataclasses.is_dataclass(default_value)\n            and type(obj_value) is type(default_value)\n        ):\n            nested_diff = get_non_default_values(obj_value, default_value)\n            if nested_diff:\n                diff[field.name] = nested_diff\n        else:\n            # For non-dataclass fields or different types, just record the value\n            diff[field.name] = obj_value\n\n    return diff\n</code></pre>"},{"location":"api/configs/#saev.configs.load_cfgs","title":"<code>load_cfgs(override, *, default, sweep_dcts)</code>","text":"<p>Load a list of configs from a combination of sources.</p> <p>Parameters:</p> Name Type Description Default <code>override</code> <code>T</code> <p>Command-line overridden values.</p> required <code>default</code> <code>T</code> <p>The default values for a config.</p> required <code>sweep_dcts</code> <code>list[dict]</code> <p>A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.</p> required <p>Returns:</p> Type Description <code>tuple[list[T], list[str]]</code> <p>A list of configs and a list of errors.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_cfgs(\n    override: T, *, default: T, sweep_dcts: list[dict]\n) -&gt; tuple[list[T], list[str]]:\n    \"\"\"\n    Load a list of configs from a combination of sources.\n\n    Args:\n        override: Command-line overridden values.\n        default: The default values for a config.\n        sweep_dcts: A list of dictionaries from Python sweep files. Each dictionary may contain list values that will be expanded.\n\n    Returns:\n        A list of configs and a list of errors.\n    \"\"\"\n    # Check that override and default are instances of a dataclass.\n    assert dataclasses.is_dataclass(override) and not isinstance(override, type)\n    assert dataclasses.is_dataclass(default) and not isinstance(default, type)\n\n    # If there's nothing to sweep, return just the override\n    if not sweep_dcts:\n        return [override], []\n\n    # Find which fields were overridden (differ from default)\n    overridden_fields = get_non_default_values(override, default)\n\n    cfgs: list[T] = []\n    errs: list[str] = []\n\n    d = 0  # Global counter for seed incrementing across all expanded configs\n\n    for sweep_dct in sweep_dcts:\n        # Filter out overridden fields from this sweep dict\n        filtered_dct = _filter_overridden_fields(sweep_dct, overridden_fields)\n\n        # If there's nothing to sweep after filtering, just use override\n        if not filtered_dct:\n            cfgs.append(override)\n            d += 1\n            continue\n\n        # Apply the sweep dict to create a config\n        try:\n            updates = _recursive_dataclass_update(override, filtered_dct, override, d)\n\n            if hasattr(override, \"seed\") and \"seed\" not in updates:\n                updates[\"seed\"] = getattr(override, \"seed\", 0) + d\n\n            cfgs.append(dataclasses.replace(override, **updates))\n            d += 1\n        except Exception as err:\n            errs.append(str(err))\n            d += 1\n\n    return cfgs, errs\n</code></pre>"},{"location":"api/configs/#saev.configs.load_sweep","title":"<code>load_sweep(sweep_fpath)</code>","text":"<p>Load a sweep file and return the list of config dicts.</p> <p>Parameters:</p> Name Type Description Default <code>sweep_fpath</code> <code>Path</code> <p>Path to a Python file with a <code>make_cfgs()</code> function.</p> required <p>Returns:</p> Type Description <code>list[dict]</code> <p>List of config dictionaries from <code>make_cfgs()</code>. Returns empty list if any error occurs.</p> Source code in <code>src/saev/configs.py</code> <pre><code>@beartype.beartype\ndef load_sweep(sweep_fpath: pathlib.Path) -&gt; list[dict]:\n    \"\"\"\n    Load a sweep file and return the list of config dicts.\n\n    Args:\n        sweep_fpath: Path to a Python file with a `make_cfgs()` function.\n\n    Returns:\n        List of config dictionaries from `make_cfgs()`. Returns empty list if any error occurs.\n    \"\"\"\n    try:\n        namespace = {}\n        exec(sweep_fpath.read_text(), namespace)\n        result = namespace[\"make_cfgs\"]()\n        if not isinstance(result, list):\n            logger.warning(\n                f\"make_cfgs() in {sweep_fpath} returned {type(result).__name__}, expected list\"\n            )\n            return []\n        return result\n    except Exception as err:\n        logger.warning(f\"Failed to load sweep from {sweep_fpath}: {err}\")\n        return []\n</code></pre>"},{"location":"api/disk/","title":"saev.disk","text":"<p>Helpers for sticking with the layout described in disk-layout.md.</p>"},{"location":"api/disk/#saev.disk.Run","title":"<code>Run(run_dir)</code>","text":"<p>Represents an SAE training run and some associated data.</p> <p>Parameters:</p> Name Type Description Default <code>run_dir</code> <code>Path</code> <p>Run directory, should be $SAEV_NFS/saev/runs/. Assumes the run already exists and validates the structure. Use <code>Run.new()</code> to create a new run. required Source code in <code>src/saev/disk.py</code> <pre><code>def __init__(self, run_dir: pathlib.Path):\n    self.run_dir = run_dir\n\n    if len(self.run_dir.parts) &lt; 3 or self.run_dir.parts[-3:-1] != (\"saev\", \"runs\"):\n        raise ValueError(\"Run directory is invalid.\")\n\n    if not self.run_dir.exists():\n        raise FileNotFoundError(\n            f\"Run directory does not exist: {self.run_dir}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"checkpoint\").exists():\n        raise FileNotFoundError(\n            f\"Checkpoint directory does not exist: {self.run_dir / 'checkpoint'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"links\").exists():\n        raise FileNotFoundError(\n            f\"Links directory does not exist: {self.run_dir / 'links'}. Use Run.new() to create a new run.\"\n        )\n    if not (self.run_dir / \"inference\").exists():\n        raise FileNotFoundError(\n            f\"Inference directory does not exist: {self.run_dir / 'inference'}. Use Run.new() to create a new run.\"\n        )\n</code></pre>"},{"location":"api/disk/#saev.disk.Run.ckpt","title":"<code>ckpt</code>  <code>property</code>","text":"<p>Path to the sae.pt checkpoint.</p>"},{"location":"api/disk/#saev.disk.Run.config","title":"<code>config</code>  <code>property</code>","text":"<p>The training run config. Not a train.Config object because we don't want to import from train.py.</p>"},{"location":"api/disk/#saev.disk.Run.inference","title":"<code>inference</code>  <code>property</code>","text":"<p>Path to the inference/ directory.</p>"},{"location":"api/disk/#saev.disk.Run.run_id","title":"<code>run_id</code>  <code>property</code>","text":"<p>The run ID, created by wandb.</p>"},{"location":"api/disk/#saev.disk.Run.train_shards","title":"<code>train_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.val_shards","title":"<code>val_shards</code>  <code>property</code>","text":"<p>Path to shard root with metadata.json and acts*.bin files.</p>"},{"location":"api/disk/#saev.disk.Run.new","title":"<code>new(run_id, *, train_shards_dir, val_shards_dir, runs_root)</code>  <code>classmethod</code>","text":"<p>Create a new run with directory structure and symlinks.</p> <p>Parameters:</p> Name Type Description Default <code>run_id</code> <code>str</code> <p>The run ID (typically from wandb).</p> required <code>train_shards_dir</code> <code>Path</code> <p>Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>val_shards_dir</code> <code>Path</code> <p>Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/). required <code>runs_root</code> <code>Path</code> <p>Root directory for runs (typically $SAEV_NFS/saev/runs).</p> required <p>Returns:</p> Type Description <code>Run</code> <p>A new Run instance with all directories and symlinks created.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@classmethod\ndef new(\n    cls,\n    run_id: str,\n    *,\n    train_shards_dir: pathlib.Path,\n    val_shards_dir: pathlib.Path,\n    runs_root: pathlib.Path,\n) -&gt; \"Run\":\n    \"\"\"\n    Create a new run with directory structure and symlinks.\n\n    Args:\n        run_id: The run ID (typically from wandb).\n        train_shards_dir: Absolute path to the train shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        val_shards_dir: Absolute path to the val shards directory (typically $SAEV_SCRATCH/saev/shards/&lt;shard_hash&gt;).\n        runs_root: Root directory for runs (typically $SAEV_NFS/saev/runs).\n\n    Returns:\n        A new Run instance with all directories and symlinks created.\n    \"\"\"\n    run_dir = runs_root / run_id\n    run_dir.mkdir(parents=True)\n    (run_dir / \"checkpoint\").mkdir()\n    (run_dir / \"links\").mkdir()\n    (run_dir / \"inference\").mkdir()\n\n    (run_dir / \"links\" / \"train-shards\").symlink_to(train_shards_dir)\n    (run_dir / \"links\" / \"val-shards\").symlink_to(val_shards_dir)\n\n    return cls(run_dir)\n</code></pre>"},{"location":"api/disk/#saev.disk.is_runs_root","title":"<code>is_runs_root(path)</code>","text":"<p>Check if <code>path</code> is a valid runs root directory.</p> <p>A valid runs root ends with <code>saev/runs</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/runs.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_runs_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid runs root directory.\n\n    A valid runs root ends with `saev/runs` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/runs.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"runs\")\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_dir","title":"<code>is_shards_dir(path)</code>","text":"<p>Check if <code>path</code> is a specific shards directory.</p> <p>A valid shards directory ends with <code>saev/shards/&lt;hash&gt;</code> for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards/ with required files. Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_dir(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a specific shards directory.\n\n    A valid shards directory ends with `saev/shards/&lt;hash&gt;` for any hash value, exists as a directory, and contains the required files (metadata.json, shards.json, labels.bin).\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards/&lt;hash&gt; with required files.\n    \"\"\"\n    if not path.is_dir():\n        return False\n\n    if len(path.parts) &lt; 3 or path.parts[-3:-1] != (\"saev\", \"shards\"):\n        return False\n\n    return True\n</code></pre>"},{"location":"api/disk/#saev.disk.is_shards_root","title":"<code>is_shards_root(path)</code>","text":"<p>Check if <code>path</code> is a valid shards root directory.</p> <p>A valid shards root ends with <code>saev/shards</code> and exists as a directory.</p> <p>Parameters:</p> Name Type Description Default <code>path</code> <code>Path</code> <p>Path to check.</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if path is a directory ending in saev/shards.</p> Source code in <code>src/saev/disk.py</code> <pre><code>@beartype.beartype\ndef is_shards_root(path: pathlib.Path) -&gt; bool:\n    \"\"\"\n    Check if `path` is a valid shards root directory.\n\n    A valid shards root ends with `saev/shards` and exists as a directory.\n\n    Args:\n        path: Path to check.\n\n    Returns:\n        True if path is a directory ending in saev/shards.\n    \"\"\"\n    return path.is_dir() and path.parts[-2:] == (\"saev\", \"shards\")\n</code></pre>"},{"location":"api/helpers/","title":"saev.helpers","text":""},{"location":"api/helpers/#saev.helpers.RemovedFeatureError","title":"<code>RemovedFeatureError</code>","text":"<p>               Bases: <code>RuntimeError</code></p> <p>Feature existed before but is no longer supported.</p>"},{"location":"api/helpers/#saev.helpers.batched_idx","title":"<code>batched_idx(total_size, batch_size)</code>","text":"<p>Iterate over (start, end) indices for total_size examples, where end - start is at most batch_size.</p> <p>Parameters:</p> Name Type Description Default <code>total_size</code> <code>int</code> <p>total number of examples</p> required <code>batch_size</code> <code>int</code> <p>maximum distance between the generated indices.</p> required <p>Returns:</p> Type Description <p>A generator of (int, int) tuples that can slice up a list or a tensor.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(self, total_size: int, batch_size: int):\n    self.total_size = total_size\n    self.batch_size = batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__iter__","title":"<code>__iter__()</code>","text":"<p>Yield (start, end) index pairs for batching.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __iter__(self) -&gt; Iterator[tuple[int, int]]:\n    \"\"\"Yield (start, end) index pairs for batching.\"\"\"\n    for start in range(0, self.total_size, self.batch_size):\n        stop = min(start + self.batch_size, self.total_size)\n        yield start, stop\n</code></pre>"},{"location":"api/helpers/#saev.helpers.batched_idx.__len__","title":"<code>__len__()</code>","text":"<p>Return the number of batches.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Return the number of batches.\"\"\"\n    return (self.total_size + self.batch_size - 1) // self.batch_size\n</code></pre>"},{"location":"api/helpers/#saev.helpers.progress","title":"<code>progress(it, *, every=10, desc='progress', total=0)</code>","text":"<p>Wraps an iterable with a logger like tqdm but doesn't use any control codes to manipulate a progress bar, which doesn't work well when your output is redirected to a file. Instead, simple logging statements are used, but it includes quality-of-life features like iteration speed and predicted time to finish.</p> <p>Parameters:</p> Name Type Description Default <code>it</code> <code>Iterable</code> <p>Iterable to wrap.</p> required <code>every</code> <code>int</code> <p>How many iterations between logging progress.</p> <code>10</code> <code>desc</code> <code>str</code> <p>What to name the logger.</p> <code>'progress'</code> <code>total</code> <code>int</code> <p>If non-zero, how long the iterable is.</p> <code>0</code> Source code in <code>src/saev/helpers.py</code> <pre><code>def __init__(\n    self, it: Iterable, *, every: int = 10, desc: str = \"progress\", total: int = 0\n):\n    self.it = it\n    self.every = max(every, 1)\n    self.logger = logging.getLogger(desc)\n    self.total = total\n</code></pre>"},{"location":"api/helpers/#saev.helpers.csr_topk","title":"<code>csr_topk(arr, *, k, axis=0, batch_size=1024)</code>","text":"<p>Takes the top k values of a sparse CSR array.</p> <p>We can only iterate efficiently over rows because it's a a CSR array.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>csr_array | csr_matrix</code> <p>The CSR array of values with shape (rows, cols).</p> required <code>k</code> <code>int</code> <p>The k in \"top-k\".</p> required <code>axis</code> <code>int</code> <p>The dimension to sort along.</p> <code>0</code> <code>batch_size</code> <code>int</code> <p>How many rows to process at once.</p> <code>1024</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>saev.helpers.NumpyTopK</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef csr_topk(\n    arr: scipy.sparse.csr_array | scipy.sparse.csr_matrix,\n    *,\n    k: int,\n    axis: int = 0,\n    batch_size: int = 1024,\n) -&gt; NumpyTopK:\n    \"\"\"\n    Takes the top k values of a sparse CSR array.\n\n    We can only iterate efficiently over *rows* because it's a a *CSR* array.\n\n    Args:\n        arr: The CSR array of values with shape (rows, cols).\n        k: The k in \"top-k\".\n        axis: The dimension to sort along.\n        batch_size: How many rows to process at once.\n\n    Returns:\n        saev.helpers.NumpyTopK\n    \"\"\"\n    if axis == 0:\n        return _csr_topk_axis0(arr, k, batch_size)\n    elif axis == 1:\n        return _csr_topk_axis1(arr, k)\n    else:\n        raise ValueError(f\"axis must be 0 or 1, got {axis}\")\n</code></pre>"},{"location":"api/helpers/#saev.helpers.current_git_commit","title":"<code>current_git_commit()</code>","text":"<p>Best-effort short SHA of the repo containing this file.</p> <p>Returns <code>None</code> when * <code>git</code> executable is missing, * we\u2019re not inside a git repo (e.g. installed wheel), * or any git call errors out.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef current_git_commit() -&gt; str | None:\n    \"\"\"\n    Best-effort short SHA of the repo containing *this* file.\n\n    Returns `None` when\n    * `git` executable is missing,\n    * we\u2019re not inside a git repo (e.g. installed wheel),\n    * or any git call errors out.\n    \"\"\"\n    try:\n        # Walk up until we either hit a .git dir or the FS root\n        here = pathlib.Path(__file__).resolve()\n        for parent in (here, *here.parents):\n            if (parent / \".git\").exists():\n                break\n        else:  # no .git found\n            return None\n\n        result = subprocess.run(\n            [\"git\", \"-C\", str(parent), \"rev-parse\", \"--short\", \"HEAD\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.DEVNULL,\n            text=True,\n            check=True,\n        )\n        return result.stdout.strip() or None\n    except (FileNotFoundError, subprocess.CalledProcessError):\n        return None\n</code></pre>"},{"location":"api/helpers/#saev.helpers.flattened","title":"<code>flattened(dct, *, sep='.')</code>","text":"<p>Flatten a potentially nested dict to a single-level dict with <code>.</code>-separated keys.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef flattened(\n    dct: dict[str, object], *, sep: str = \".\"\n) -&gt; dict[str, str | int | float | bool | None]:\n    \"\"\"\n    Flatten a potentially nested dict to a single-level dict with `.`-separated keys.\n    \"\"\"\n    new = {}\n    for key, value in dct.items():\n        if isinstance(value, dict):\n            for nested_key, nested_value in flattened(value).items():\n                new[key + \".\" + nested_key] = nested_value\n            continue\n\n        new[key] = value\n\n    return new\n</code></pre>"},{"location":"api/helpers/#saev.helpers.fssafe","title":"<code>fssafe(s)</code>","text":"<p>Convert a string to be filesystem-safe by replacing special characters.</p> <p>This is particularly useful for checkpoint names that contain characters like 'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like 'hf-hub_timm_ViT-L-16-SigLIP2-256'.</p> <p>Parameters:</p> Name Type Description Default <code>s</code> <code>str</code> <p>String to make filesystem-safe.</p> required <p>Returns:</p> Type Description <code>str</code> <p>Filesystem-safe version of the string.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef fssafe(s: str) -&gt; str:\n    \"\"\"\n    Convert a string to be filesystem-safe by replacing special characters.\n\n    This is particularly useful for checkpoint names that contain characters like\n    'hf-hub:timm/ViT-L-16-SigLIP2-256' which need to be converted to something like\n    'hf-hub_timm_ViT-L-16-SigLIP2-256'.\n\n    Args:\n        s: String to make filesystem-safe.\n\n    Returns:\n        Filesystem-safe version of the string.\n    \"\"\"\n    # Replace common problematic characters with underscores\n    replacements = {\n        \"/\": \"_\",\n        \"\\\\\": \"_\",\n        \":\": \"_\",\n        \"*\": \"_\",\n        \"?\": \"_\",\n        '\"': \"_\",\n        \"&lt;\": \"_\",\n        \"&gt;\": \"_\",\n        \"|\": \"_\",\n        \" \": \"_\",\n    }\n    for old, new in replacements.items():\n        s = s.replace(old, new)\n    # Remove any remaining non-alphanumeric characters except - _ .\n    return \"\".join(c if c.isalnum() or c in \"-_.\" else \"_\" for c in s)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_cache_dir","title":"<code>get_cache_dir()</code>","text":"<p>Get cache directory from environment variables, defaulting to the current working directory (.)</p> <p>Returns:</p> Type Description <code>str</code> <p>A path to a cache directory (might not exist yet).</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_cache_dir() -&gt; str:\n    \"\"\"\n    Get cache directory from environment variables, defaulting to the current working directory (.)\n\n    Returns:\n        A path to a cache directory (might not exist yet).\n    \"\"\"\n    cache_dir = \"\"\n    for var in (\"SAEV_CACHE\", \"HF_HOME\", \"HF_HUB_CACHE\"):\n        cache_dir = cache_dir or os.environ.get(var, \"\")\n    return cache_dir or \".\"\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_job_count","title":"<code>get_slurm_job_count()</code>","text":"<p>Get the current number of jobs in the queue for the current user.</p> <p>Uses squeue's -r flag to properly count job array elements individually. For example, a job array 12345_[0-99] will be counted as 100 jobs.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_job_count() -&gt; int:\n    \"\"\"\n    Get the current number of jobs in the queue for the current user.\n\n    Uses squeue's -r flag to properly count job array elements individually.\n    For example, a job array 12345_[0-99] will be counted as 100 jobs.\n    \"\"\"\n    try:\n        # Use -r to display each array element on its own line\n        result = subprocess.run(\n            [\"squeue\", \"--me\", \"-h\", \"-r\"], capture_output=True, text=True, check=True\n        )\n\n        # Count non-empty lines\n        lines = result.stdout.strip().split(\"\\n\")\n        return len([line for line in lines if line.strip()])\n\n    except (subprocess.SubprocessError, FileNotFoundError):\n        # If we can't check, assume no jobs\n        return 0\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_array_size","title":"<code>get_slurm_max_array_size()</code>","text":"<p>Get the MaxArraySize configuration from the current Slurm cluster.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_array_size() -&gt; int:\n    \"\"\"\n    Get the MaxArraySize configuration from the current Slurm cluster.\n\n    Returns:\n        int: The maximum array size allowed on the cluster. Returns 1000 as fallback if unable to determine.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # Run scontrol command to get config information\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"config\"], capture_output=True, text=True, check=True\n        )\n\n        # Search for MaxArraySize in the output\n        match = re.search(r\"MaxArraySize\\s*=\\s*(\\d+)\", result.stdout)\n        if match:\n            max_array_size = int(match.group(1))\n            logger.info(\"Detected MaxArraySize = %d\", max_array_size)\n            return max_array_size\n        else:\n            logger.warning(\n                \"Could not find MaxArraySize in scontrol output, using default of 1000\"\n            )\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error running scontrol: %s\", e)\n        return 1000  # Safe default\n    except ValueError as e:\n        logger.error(\"Error parsing MaxArraySize: %s\", e)\n        return 1000  # Safe default\n    except FileNotFoundError:\n        logger.warning(\n            \"scontrol command not found. Assuming not in Slurm environment. Returning default MaxArraySize=1000.\"\n        )\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.get_slurm_max_submit_jobs","title":"<code>get_slurm_max_submit_jobs()</code>","text":"<p>Get the MaxSubmitJobs limit from the current user's QOS.</p> <p>Returns:</p> Name Type Description <code>int</code> <code>int</code> <p>The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef get_slurm_max_submit_jobs() -&gt; int:\n    \"\"\"\n    Get the MaxSubmitJobs limit from the current user's QOS.\n\n    Returns:\n        int: The maximum number of jobs that can be submitted at once. Returns 1000 as fallback.\n    \"\"\"\n    logger = logging.getLogger(\"helpers.slurm\")\n    try:\n        # First, try to get the QOS from a recent job\n        result = subprocess.run(\n            [\"scontrol\", \"show\", \"job\", \"-o\"],\n            capture_output=True,\n            text=True,\n            check=False,\n        )\n\n        qos_name = None\n        if result.returncode == 0 and result.stdout:\n            # Extract QOS from job info\n            match = re.search(r\"QOS=(\\S+)\", result.stdout)\n            if match:\n                qos_name = match.group(1)\n\n        if not qos_name:\n            # If no jobs, try to get default QOS from association\n            # This is less reliable but better than nothing\n            logger.warning(\"No active jobs to determine QOS, using default of 1000\")\n            return 1000\n\n        # Get the MaxSubmitJobs for this QOS\n        result = subprocess.run(\n            [\"sacctmgr\", \"show\", \"qos\", qos_name, \"format=maxsubmitjobs\", \"-n\", \"-P\"],\n            capture_output=True,\n            text=True,\n            check=True,\n        )\n\n        max_submit = result.stdout.strip()\n        if max_submit and max_submit.isdigit():\n            limit = int(max_submit)\n            logger.info(\"Detected MaxSubmitJobs = %d for QOS %s\", limit, qos_name)\n            return limit\n        else:\n            logger.warning(\"Could not parse MaxSubmitJobs, using default of 1000\")\n            return 1000\n\n    except subprocess.SubprocessError as e:\n        logger.error(\"Error getting MaxSubmitJobs: %s\", e)\n        return 1000\n    except (ValueError, FileNotFoundError) as e:\n        logger.error(\"Error: %s\", e)\n        return 1000\n</code></pre>"},{"location":"api/helpers/#saev.helpers.np_topk","title":"<code>np_topk(arr, k, axis=None)</code>","text":"<p>A numpy implementation of torch.topk.</p> <p>Returns the k largest elements along the given axis. If axis is None, the array is flattened first.</p> <p>Parameters:</p> Name Type Description Default <code>arr</code> <code>ndarray</code> <p>Input array.</p> required <code>k</code> <code>int</code> <p>Number of top elements to return.</p> required <code>axis</code> <code>int | None</code> <p>Axis along which to find top k elements. If None, flattens array first.</p> <code>None</code> <p>Returns:</p> Type Description <code>NumpyTopK</code> <p>Array of k largest values along the specified axis, sorted in descending order.</p> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef np_topk(arr: np.ndarray, k: int, axis: int | None = None) -&gt; NumpyTopK:\n    \"\"\"A numpy implementation of torch.topk.\n\n    Returns the k largest elements along the given axis. If axis is None, the array is flattened first.\n\n    Args:\n        arr: Input array.\n        k: Number of top elements to return.\n        axis: Axis along which to find top k elements. If None, flattens array first.\n\n    Returns:\n        Array of k largest values along the specified axis, sorted in descending order.\n    \"\"\"\n    if axis is None:\n        arr = arr.flatten()\n        axis = 0\n\n    # Handle negative axis\n    if axis &lt; 0:\n        axis = arr.ndim + axis\n\n    # For each position along other axes, sort and take top k\n    # Use argsort which is stable and will preserve order for equal values\n    sort_indices = np.argsort(-arr, axis=axis, kind=\"stable\")\n\n    # Take the first k sorted indices\n    topk_indices = np.take(sort_indices, np.arange(k), axis=axis)\n\n    # Gather the top k values\n    topk_values = np.take_along_axis(arr, topk_indices, axis=axis)\n\n    return NumpyTopK(values=topk_values, indices=topk_indices)\n</code></pre>"},{"location":"api/helpers/#saev.helpers.submit_job_array","title":"<code>submit_job_array(executor, fn, args_list, *, logger=None, margin=0.8)</code>","text":"<p>Submit jobs in batches to respect Slurm's MaxArraySize limit.</p> <p>Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.</p> <p>Parameters:</p> Name Type Description Default <code>executor</code> <p>A submitit executor (SlurmExecutor or LocalExecutor).</p> required <code>fn</code> <code>Callable</code> <p>Worker function to call for each config.</p> required <code>args_list</code> <code>list</code> <p>List of arguments to pass to fn.</p> required <code>logger</code> <code>Logger | None</code> <p>Optional logger for progress messages.</p> <code>None</code> <code>margin</code> <code>float</code> <p>Fraction of MaxArraySize to use (default 0.8).</p> <code>0.8</code> <p>Yields:</p> Type Description <code>int</code> <p>Tuples of (global_index, result) for successful jobs.</p> <code>object</code> <p>For failed jobs, yields (global_index, None) and logs a warning.</p> Example <pre><code>executor = submitit.SlurmExecutor(folder=\"./logs\")\nexecutor.update_parameters(...)\n\nfor idx, result in submit_job_array(executor, worker_fn, configs):\n    print(f\"Job {idx} returned {result}\")\n</code></pre> Source code in <code>src/saev/helpers.py</code> <pre><code>@beartype.beartype\ndef submit_job_array(\n    executor,\n    fn: tp.Callable,\n    args_list: list,\n    *,\n    logger: logging.Logger | None = None,\n    margin: float = 0.8,\n) -&gt; Iterator[tuple[int, object]]:\n    \"\"\"\n    Submit jobs in batches to respect Slurm's MaxArraySize limit.\n\n    Yields (index, result) tuples as jobs complete. Batches are submitted sequentially - each batch must complete before the next is submitted.\n\n    Args:\n        executor: A submitit executor (SlurmExecutor or LocalExecutor).\n        fn: Worker function to call for each config.\n        args_list: List of arguments to pass to fn.\n        logger: Optional logger for progress messages.\n        margin: Fraction of MaxArraySize to use (default 0.8).\n\n    Yields:\n        Tuples of (global_index, result) for successful jobs.\n        For failed jobs, yields (global_index, None) and logs a warning.\n\n    Example:\n        ```\n        executor = submitit.SlurmExecutor(folder=\"./logs\")\n        executor.update_parameters(...)\n\n        for idx, result in submit_job_array(executor, worker_fn, configs):\n            print(f\"Job {idx} returned {result}\")\n        ```\n    \"\"\"\n    from submitit.core.utils import UncompletedJobError\n\n    arr_size = int(get_slurm_max_array_size() * margin)\n    n_total = len(args_list)\n\n    for arr_start, arr_end in batched_idx(n_total, arr_size):\n        batch_args = args_list[arr_start:arr_end]\n\n        if logger:\n            logger.info(\n                \"Submitting batch of %d jobs (%d-%d of %d).\",\n                len(batch_args),\n                arr_start + 1,\n                arr_end,\n                n_total,\n            )\n\n        with executor.batch():\n            jobs = [executor.submit(fn, arg) for arg in batch_args]\n\n        time.sleep(5.0)\n\n        for i, job in enumerate(jobs):\n            global_idx = arr_start + i\n            try:\n                result = job.result()\n                yield global_idx, result\n            except UncompletedJobError:\n                if logger:\n                    logger.warning(\n                        \"Job %s (%d) did not finish.\", job.job_id, global_idx\n                    )\n                yield global_idx, None\n</code></pre>"},{"location":"api/metrics/","title":"saev.metrics","text":""},{"location":"api/metrics/#saev.metrics.Metrics","title":"<code>Metrics(mse_per_dim, mse_per_token, normalized_mse, baseline_mse_per_dim, baseline_mse_per_token, sse_recon, sse_baseline, n_tokens, d_model, n_elements)</code>  <code>dataclass</code>","text":"<p>Validated reconstruction metrics aggregated over one evaluation corpus.</p> <p>The primary totals are <code>sse_recon</code> (SAE reconstruction SSE) and <code>sse_baseline</code> (mean-baseline SSE). Derived terms are: - <code>normalized_mse = sse_recon / sse_baseline</code> - <code>mse_per_dim = sse_recon / n_elements</code> - <code>mse_per_token = sse_recon / n_tokens</code> - <code>baseline_mse_per_dim = sse_baseline / n_elements</code> - <code>baseline_mse_per_token = sse_baseline / n_tokens</code></p> <p>Size terms are: - <code>n_tokens</code>: number of tokens included in aggregation - <code>d_model</code>: embedding width per token - <code>n_elements = n_tokens * d_model</code></p>"},{"location":"api/metrics/#saev.metrics.Metrics.from_accumulators","title":"<code>from_accumulators(*, sse_recon, sse_baseline, n_tokens, d_model)</code>  <code>classmethod</code>","text":"<p>Construct metrics from aggregate sums and shape information.</p> <p>Parameters:</p> Name Type Description Default <code>sse_recon</code> <code>float</code> <p>Sum of squared reconstruction errors over all selected tokens and dimensions.</p> required <code>sse_baseline</code> <code>float</code> <p>Sum of squared mean-baseline errors over the same tokens and dimensions.</p> required <code>n_tokens</code> <code>int</code> <p>Number of selected tokens in the aggregation set.</p> required <code>d_model</code> <code>int</code> <p>Activation dimension per token.</p> required <p>Returns:</p> Type Description <code>Metrics</code> <p>A validated <code>Metrics</code> object with all derived fields populated.</p> Source code in <code>src/saev/metrics.py</code> <pre><code>@classmethod\ndef from_accumulators(\n    cls, *, sse_recon: float, sse_baseline: float, n_tokens: int, d_model: int\n) -&gt; \"Metrics\":\n    \"\"\"Construct metrics from aggregate sums and shape information.\n\n    Args:\n        sse_recon: Sum of squared reconstruction errors over all selected tokens and dimensions.\n        sse_baseline: Sum of squared mean-baseline errors over the same tokens and dimensions.\n        n_tokens: Number of selected tokens in the aggregation set.\n        d_model: Activation dimension per token.\n\n    Returns:\n        A validated `Metrics` object with all derived fields populated.\n    \"\"\"\n\n    msg = f\"n_tokens must be positive, got {n_tokens}.\"\n    assert n_tokens &gt; 0, msg\n    msg = f\"d_model must be positive, got {d_model}.\"\n    assert d_model &gt; 0, msg\n    msg = f\"sse_recon must be &gt;= 0, got {sse_recon}.\"\n    assert sse_recon &gt;= 0.0, msg\n    msg = f\"sse_baseline must be &gt; 0, got {sse_baseline}.\"\n    assert sse_baseline &gt; 0.0, msg\n\n    n_elements = n_tokens * d_model\n    return cls(\n        mse_per_dim=sse_recon / n_elements,\n        mse_per_token=sse_recon / n_tokens,\n        normalized_mse=sse_recon / sse_baseline,\n        baseline_mse_per_dim=sse_baseline / n_elements,\n        baseline_mse_per_token=sse_baseline / n_tokens,\n        sse_recon=sse_recon,\n        sse_baseline=sse_baseline,\n        n_tokens=n_tokens,\n        d_model=d_model,\n        n_elements=n_elements,\n    )\n</code></pre>"},{"location":"api/saev/","title":"saev","text":"<p>saev is a Python package for training sparse autoencoders (SAEs) on vision transformers (ViTs) in PyTorch.</p>"},{"location":"api/summary/","title":"Summary","text":"<ul> <li>saev</li> <li>saev.colors</li> <li>saev.configs</li> <li>saev.data</li> <li>saev.data.bird_mae</li> <li>saev.data.buffers</li> <li>saev.data.clip</li> <li>saev.data.datasets</li> <li>saev.data.dinov2</li> <li>saev.data.dinov3</li> <li>saev.data.fake_clip</li> <li>saev.data.indexed</li> <li>saev.data.models</li> <li>saev.data.ordered</li> <li>saev.data.pe</li> <li>saev.data.shards</li> <li>saev.data.shuffled</li> <li>saev.data.siglip</li> <li>saev.data.transforms</li> <li>saev.disk</li> <li>saev.framework</li> <li>saev.framework.inference</li> <li>saev.framework.shards</li> <li>saev.framework.train</li> <li>saev.helpers</li> <li>saev.metrics</li> <li>saev.nn</li> <li>saev.nn.modeling</li> <li>saev.nn.objectives</li> <li>saev.utils</li> <li>saev.utils.monitoring</li> <li>saev.utils.scheduling</li> <li>saev.utils.statistics</li> <li>saev.utils.wandb</li> <li>saev.viz</li> </ul>"},{"location":"api/viz/","title":"saev.viz","text":""},{"location":"api/viz/#saev.viz.load_palette","title":"<code>load_palette(path)</code>","text":"<p>TODO: docstring.</p> Source code in <code>src/saev/viz.py</code> <pre><code>@beartype.beartype\ndef load_palette(path: pathlib.Path) -&gt; list[tuple[float, float, float]]:\n    \"\"\"TODO: docstring.\"\"\"\n    import glasbey\n\n    palette = []\n\n    for i, line in enumerate(path.read_text().split(\"\\n\")):\n        line = line.strip()\n        if not line:\n            palette.append(None)\n            continue\n\n        palette.append(parse_color(line))\n\n    # Extend the palette using https://glasbey.readthedocs.io/en/latest/extending_palettes.html\n    n_missing = sum(color is None for color in palette)\n    if n_missing:\n        seed_palette = [color for color in palette if color is not None]\n        if seed_palette:\n            extended = glasbey.extend_palette(\n                seed_palette, palette_size=len(seed_palette) + n_missing, as_hex=False\n            )\n            fill_colors = extended[len(seed_palette) :]\n        else:\n            fill_colors = glasbey.create_palette(palette_size=n_missing, as_hex=False)\n\n        fill_iter = iter(fill_colors)\n        for i, color in enumerate(palette):\n            if color is not None:\n                continue\n            next_color = tuple(float(chan) for chan in next(fill_iter))\n            palette[i] = next_color\n\n    for i, color in enumerate(palette):\n        assert color is not None\n        msg = f\"Color {i} is invalid: {color}\"\n        assert all(0 &lt;= chan &lt;= 1 and isinstance(chan, float) for chan in color), msg\n\n    return palette\n</code></pre>"},{"location":"api/data/bird_mae/","title":"saev.data.bird_mae","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.Encoder","title":"<code>Encoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Pure PyTorch Bird-MAE backbone (no HF).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, cfg: Config) -&gt; None:\n    super().__init__()\n    self.cfg = cfg\n\n    self.patch_embed = PatchEmbed(\n        img_size=(cfg.img_size_x, cfg.img_size_y),\n        patch_size=(cfg.patch_size, cfg.patch_size),\n        in_chans=cfg.in_chans,\n        embed_dim=cfg.embed_dim,\n    )\n\n    self.cls_token = nn.Parameter(torch.zeros(1, 1, cfg.embed_dim))\n    self.pos_embed = nn.Parameter(\n        torch.zeros(1, cfg.n_patches + 1, cfg.embed_dim),\n        requires_grad=cfg.pos_trainable,\n    )\n\n    if self.pos_embed.data.shape[1] == cfg.n_tokens:\n        pos_embed_np = get_2d_sincos_pos_embed_flexible(\n            self.pos_embed.shape[-1],\n            self.patch_embed.patch_hw,\n            cls_token=True,\n        )\n        self.pos_embed.data.copy_(\n            torch.from_numpy(pos_embed_np).float().unsqueeze(0)\n        )\n    else:\n        logger.warning(\n            \"Positional embedding shape mismatch. Will not initialize sin-cos pos embed.\"\n        )\n\n    dpr = [x.item() for x in torch.linspace(0, cfg.drop_rate, cfg.depth)]\n    self.blocks = nn.ModuleList([\n        Block(\n            dim=cfg.embed_dim,\n            n_heads=cfg.n_heads,\n            mlp_ratio=cfg.mlp_ratio,\n            qkv_bias=cfg.qkv_bias,\n            qk_norm=cfg.qk_norm,\n            init_values=cfg.init_values,\n            proj_drop=cfg.drop_rate,\n            attn_drop=cfg.drop_rate,\n            drop_path=dpr[i],\n            norm_layer=functools.partial(nn.LayerNorm, eps=cfg.norm_layer_eps),\n        )\n        for i in range(cfg.depth)\n    ])\n\n    self.pos_drop = nn.Dropout(p=cfg.drop_rate)\n    self.norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n    self.fc_norm = nn.LayerNorm(cfg.embed_dim, eps=cfg.norm_layer_eps)\n\n    nn.init.trunc_normal_(self.cls_token, std=0.02)\n    self.apply(self._init_weights)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.PatchEmbed","title":"<code>PatchEmbed(img_size=(512, 128), patch_size=(16, 16), in_chans=1, embed_dim=768)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Image (time x mel) to patch embeddings.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(\n    self,\n    img_size: tuple[int, int] = (512, 128),\n    patch_size: tuple[int, int] = (16, 16),\n    in_chans: int = 1,\n    embed_dim: int = 768,\n) -&gt; None:\n    super().__init__()\n    img_size = _ntuple(2)(img_size)\n    patch_size = _ntuple(2)(patch_size)\n    n_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])\n    self.patch_hw = (img_size[1] // patch_size[1], img_size[0] // patch_size[0])\n    self.img_size = img_size\n    self.patch_size = patch_size\n    self.n_patches = n_patches\n\n    self.proj = nn.Conv2d(\n        in_chans,\n        embed_dim,\n        kernel_size=patch_size,\n        stride=patch_size,\n    )\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer","title":"<code>Transformer(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self.model = load(ckpt)\n\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(ckpt.lower())\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization.</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization.\"\"\"\n    raise NotImplementedError(\"Bird-MAE uses audio spectrograms, not images.\")\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.Transformer.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n    return transform, None\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio","title":"<code>filter_audio(waveform, sample_rate, patches, *, mode='time')</code>","text":"<p>Filter audio based on SAE patch activations over the log-mel spectrogram.</p> <p>Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.</p> <p>Parameters:</p> Name Type Description Default <code>waveform</code> <code>Float[Tensor, ' samples']</code> <p>Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.</p> required <code>sample_rate</code> <code>int</code> <p>Audio sample rate in Hz. Should be 32000 for Bird-MAE.</p> required <code>patches</code> <code>Bool[Tensor, ' content_tokens_per_example']</code> <p>Boolean SAE activation values per patch, shape [256]. Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.</p> required <code>mode</code> <code>Literal['time', 'time+freq']</code> <p>Filtering mode. - \"time\": Clip to time segments with high activations (preserves all frequencies). - \"time+freq\": Clip time AND apply frequency masking via STFT.</p> <code>'time'</code> <p>Returns:</p> Type Description <code>Float[Tensor, ' clipped']</code> <p>Filtered audio waveform as a 1D torch tensor.</p> Example <p>waveform_np, sr = librosa.load(audio_path, sr=32000) mel = bird_mae.transform(waveform_np)  # [512, 128] waveform = torch.from_numpy(waveform_np)</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef filter_audio(\n    waveform: Float[Tensor, \" samples\"],\n    sample_rate: int,\n    patches: Bool[Tensor, \" content_tokens_per_example\"],\n    *,\n    mode: tp.Literal[\"time\", \"time+freq\"] = \"time\",\n) -&gt; Float[Tensor, \" clipped\"]:\n    \"\"\"\n    Filter audio based on SAE patch activations over the log-mel spectrogram.\n\n    Given a waveform and the SAE activation values for each spectrogram patch, this function extracts audio segments corresponding to highly-activated patches.\n\n    Args:\n        waveform: Raw audio samples, shape [samples]. Should be 5 seconds at 32kHz.\n        sample_rate: Audio sample rate in Hz. Should be 32000 for Bird-MAE.\n        patches: Boolean SAE activation values per patch, shape [256].\n            Patches are indexed in row-major order: patch i corresponds to time_patch = i // 8, mel_patch = i % 8.\n        mode: Filtering mode.\n            - \"time\": Clip to time segments with high activations (preserves all frequencies).\n            - \"time+freq\": Clip time AND apply frequency masking via STFT.\n\n    Returns:\n        Filtered audio waveform as a 1D torch tensor.\n\n    Example:\n        &gt;&gt;&gt; waveform_np, sr = librosa.load(audio_path, sr=32000)\n        &gt;&gt;&gt; mel = bird_mae.transform(waveform_np)  # [512, 128]\n        &gt;&gt;&gt; waveform = torch.from_numpy(waveform_np)\n        &gt;&gt;&gt; # ... run through SAE to get patch_activations [256] ...\n        &gt;&gt;&gt; # ... covert SAE activations to bool with &gt; 0 ...\n        &gt;&gt;&gt; time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\")\n        &gt;&gt;&gt; time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")\n    \"\"\"\n    msg = f\"Bird-MAE expects sample_rate={BIRDMAE_SR_HZ}, got {sample_rate}.\"\n    assert sample_rate == BIRDMAE_SR_HZ, msg\n    assert patches.shape == (BIRDMAE_N_TIME_PATCHES * BIRDMAE_N_MEL_PATCHES,)\n    assert waveform.ndim == 1, waveform.shape\n\n    # Match transform(): pad/truncate to exactly 5s\n    waveform_t = waveform.to(torch.float32)\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if waveform_t.numel() &lt; max_len:\n        pad = max_len - waveform_t.numel()\n        waveform_t = F.pad(waveform_t, (0, pad))\n    else:\n        waveform_t = waveform_t[:max_len]\n    if mode == \"time+freq\":\n        # STFT parameters matching Kaldi/BirdMAE assumptions approximately\n        n_fft = BIRDMAE_STFT_N_FFT\n        hop_length = BIRDMAE_STFT_HOP_LENGTH\n        win_length = BIRDMAE_STFT_WIN_LENGTH\n        window = torch.hann_window(win_length)\n\n        stft = torch.stft(\n            waveform_t,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            return_complex=True,\n        )\n        # stft shape: [freq_bins, time_frames]\n        # freq_bins = 513\n        # time_frames ~ 498 for 160000 samples\n\n        freqs = torch.linspace(0, sample_rate / 2, stft.shape[0])\n        mask = torch.zeros_like(stft, dtype=torch.bool)\n\n        # Mel range\n        low_freq = BIRDMAE_STFT_LOW_FREQ_HZ\n        high_freq = sample_rate / 2\n        min_mel = hz_to_mel(low_freq)\n        max_mel = hz_to_mel(high_freq)\n        mel_range = max_mel - min_mel\n\n        active_patch_i = torch.nonzero(patches, as_tuple=False).flatten().tolist()\n        for i in active_patch_i:\n            time_idx = i // BIRDMAE_N_MEL_PATCHES\n            mel_idx = i % BIRDMAE_N_MEL_PATCHES\n\n            # Time range (frames)\n            t_start = time_idx * BIRDMAE_FRAMES_PER_PATCH\n            t_end = (time_idx + 1) * BIRDMAE_FRAMES_PER_PATCH\n\n            # Frequency range (Hz)\n            # 128 mel bins total, 16 bins per patch\n            p_mel_low = (\n                min_mel\n                + (mel_idx * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n            p_mel_high = (\n                min_mel\n                + ((mel_idx + 1) * BIRDMAE_MELS_PER_PATCH / BIRDMAE_N_MELS) * mel_range\n            )\n\n            hz_low = mel_to_hz(p_mel_low)\n            hz_high = mel_to_hz(p_mel_high)\n\n            freq_mask = (freqs &gt;= hz_low) &amp; (freqs &lt; hz_high)\n\n            # Apply mask to valid frames\n            valid_t_end = min(t_end, stft.shape[1])\n            if t_start &lt; valid_t_end:\n                mask[freq_mask, t_start:valid_t_end] = True\n\n        stft_filtered = stft * mask\n        waveform_t = torch.istft(\n            stft_filtered,\n            n_fft=n_fft,\n            hop_length=hop_length,\n            win_length=win_length,\n            window=window,\n            center=True,\n            length=waveform_t.shape[0],\n        )\n\n    # Time clipping (applies to both modes)\n    active_time_indices = torch.unique(\n        torch.nonzero(patches, as_tuple=False).flatten() // BIRDMAE_N_MEL_PATCHES\n    ).tolist()\n    segments = []\n\n    for t in active_time_indices:\n        start = t * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        end = (t + 1) * BIRDMAE_SAMPLES_PER_TIME_PATCH\n        if start &gt;= waveform_t.shape[0]:\n            continue\n        seg = waveform_t[start : min(end, waveform_t.shape[0])]\n        segments.append(seg)\n\n    if not segments:\n        return waveform_t[:0]\n\n    return torch.cat(segments, dim=0)\n</code></pre>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--run-through-sae-to-get-patch_activations-256","title":"... run through SAE to get patch_activations [256] ...","text":""},{"location":"api/data/bird_mae/#saev.data.bird_mae.filter_audio--covert-sae-activations-to-bool-with-0","title":"... covert SAE activations to bool with &gt; 0 ...","text":"<p>time_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time\") time_freq_clip = bird_mae.filter_audio(waveform, sr, patches, mode=\"time+freq\")</p>"},{"location":"api/data/bird_mae/#saev.data.bird_mae.transform","title":"<code>transform(waveform)</code>","text":"<p>waveform: 1D tensor [samples] returns: 2D tensor [512, 128] matching HF's feature extractor output</p> Source code in <code>src/saev/data/bird_mae.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef transform(waveform: Float[np.ndarray, \" samples\"]) -&gt; Float[Tensor, \"time mels\"]:\n    \"\"\"\n    waveform: 1D tensor [samples]\n    returns: 2D tensor [512, 128] matching HF's feature extractor output\n    \"\"\"\n    import torchaudio.compliance.kaldi\n\n    waveform = torch.from_numpy(waveform).to(torch.float32)\n    (n_samples,) = waveform.shape\n    # 1) pad/truncate to exactly 5 s\n    max_len = BIRDMAE_SR_HZ * BIRDMAE_CLIP_SEC\n    if n_samples &lt; max_len:\n        pad = max_len - n_samples\n        waveform = F.pad(waveform, (0, pad))\n    else:\n        waveform = waveform[:max_len]\n\n    # 2) mean-center (per clip)\n    waveform = waveform - waveform.mean(dim=0, keepdim=True)\n\n    # 3) Kaldi fbank: [T, 128]\n    fb = torchaudio.compliance.kaldi.fbank(\n        waveform[None, :],\n        htk_compat=True,\n        sample_frequency=BIRDMAE_SR_HZ,\n        use_energy=False,\n        window_type=\"hanning\",\n        num_mel_bins=BIRDMAE_N_MELS,\n        dither=0.0,\n        frame_shift=10.0,\n    )  # [T, 128]\n\n    # 4) pad to 512 frames with min value\n    t, _ = fb.shape\n    if t &lt; BIRDMAE_TARGET_T:\n        diff = BIRDMAE_TARGET_T - t\n        min_val = fb.min()\n        fb = F.pad(fb, (0, 0, 0, diff), value=min_val.item())\n    elif t &gt; BIRDMAE_TARGET_T:\n        fb = fb[:BIRDMAE_TARGET_T]\n\n    fb = (fb - BIRDMAE_MEAN) / (BIRDMAE_STD * 2.0)\n\n    assert fb.shape == (BIRDMAE_TARGET_T, BIRDMAE_N_MELS), fb.shape\n\n    return fb\n</code></pre>"},{"location":"api/data/buffers/","title":"saev.data.buffers","text":""},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer","title":"<code>ReservoirBuffer(capacity, shape, *, dtype=torch.float32, meta_shape=(2,), meta_dtype=torch.int32, seed=0, collate_fn=None)</code>","text":"<p>Pool of (tensor, meta) pairs. Multiple producers call put(batch_x, batch_meta). Multiple consumers call get(batch_size) -&gt; (x, meta). Random order, each sample delivered once, blocking semantics.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(\n    self,\n    capacity: int,\n    shape: tuple[int, ...],\n    *,\n    dtype: torch.dtype = torch.float32,\n    meta_shape: tuple[int, ...] = (2,),\n    meta_dtype: torch.dtype = torch.int32,\n    seed: int = 0,\n    collate_fn: collections.abc.Callable | None = None,\n):\n    self.capacity = capacity\n    self._empty = 123456789\n\n    self.data = torch.full((capacity, *shape), self._empty, dtype=dtype)\n    self.data.share_memory_()\n\n    self.meta = torch.full((capacity, *meta_shape), self._empty, dtype=meta_dtype)\n    self.meta.share_memory_()\n\n    self.ctx = mp.get_context()\n\n    self.size = self.ctx.Value(\"L\", 0)  # current live items\n    self.lock = self.ctx.Lock()  # guards size+swap\n    self.free = self.ctx.Semaphore(capacity)\n    self.full = self.ctx.Semaphore(0)\n    # Each process has its own RNG.\n    self.rng = np.random.default_rng(seed)\n\n    self.collate_fn = collate_fn\n\n    self.logger = logging.getLogger(f\"reservoir({os.getpid()})\")\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.data.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.ReservoirBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return self.size.value\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer","title":"<code>RingBuffer(slots, shape, dtype)</code>","text":"<p>Fixed-capacity, multiple-producer / multiple-consumer queue backed by a shared-memory tensor.</p>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer--parameters","title":"Parameters","text":"<p>slots  : int           capacity in number of items (tensor rows) shape  : tuple[int]    shape of one item, e.g. (batch, dim) dtype  : torch.dtype   tensor dtype</p> <p>put(tensor)  : blocks if full get() -&gt; tensor  : blocks if empty qsize() -&gt; int        advisory size (approximate) close()               frees shared storage (call in the main process)</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def __init__(self, slots: int, shape: tuple[int, ...], dtype: torch.dtype):\n    assert slots &gt; 0, \"slots must be positive\"\n    self.slots = slots\n    # 123456789 -&gt; Should make you very worried.\n    self.buf = torch.full((slots, *shape), 123456789, dtype=dtype)\n    self.buf.share_memory_()\n\n    ctx = mp.get_context()  # obeys the global start method (\"spawn\")\n\n    # shared, lock-free counters\n    self.head = ctx.Value(\"L\", 0, lock=False)  # next free slot\n    self.tail = ctx.Value(\"L\", 0, lock=False)  # next occupied slot\n\n    # semaphores for blocking semantics\n    self.free = ctx.Semaphore(slots)  # initially all slots free\n    self.full = ctx.Semaphore(0)  # no filled slots yet\n\n    # one mutex for pointer updates\n    self.mutex = ctx.Lock()\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.close","title":"<code>close()</code>","text":"<p>Release the shared-memory backing store (call once in the parent).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def close(self) -&gt; None:\n    \"\"\"Release the shared-memory backing store (call once in the parent).\"\"\"\n    try:\n        self.buf.untyped_storage()._free_shared_mem()\n    except (AttributeError, FileNotFoundError):\n        pass  # already freed or never allocated\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.fill","title":"<code>fill()</code>","text":"<p>Approximate proportion of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def fill(self) -&gt; float:\n    \"\"\"Approximate proportion of filled slots (race-safe enough for tests).\"\"\"\n    return self.qsize() / self.capacity\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.get","title":"<code>get()</code>","text":"<p>Return a view of the next item; blocks if the queue is empty.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def get(self) -&gt; torch.Tensor:\n    \"\"\"Return a view of the next item; blocks if the queue is empty.\"\"\"\n    self.full.acquire()  # wait for data\n    with self.mutex:  # exclusive update of tail\n        idx = self.tail.value % self.slots\n        out = self.buf[idx].clone()\n        self.tail.value += 1\n    self.free.release()  # signal one more free slot\n    return out\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.put","title":"<code>put(tensor)</code>","text":"<p>Copy <code>tensor</code> into the next free slot; blocks if the queue is full.</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def put(self, tensor: torch.Tensor) -&gt; None:\n    \"\"\"Copy `tensor` into the next free slot; blocks if the queue is full.\"\"\"\n    if tensor.shape != self.buf.shape[1:] or tensor.dtype != self.buf.dtype:\n        raise ValueError(\"tensor shape / dtype mismatch\")\n\n    self.free.acquire()  # wait for a free slot\n    with self.mutex:  # exclusive update of head\n        idx = self.head.value % self.slots\n        self.buf[idx].copy_(tensor)\n        self.head.value += 1\n    self.full.release()  # signal there is data\n</code></pre>"},{"location":"api/data/buffers/#saev.data.buffers.RingBuffer.qsize","title":"<code>qsize()</code>","text":"<p>Approximate number of filled slots (race-safe enough for tests).</p> Source code in <code>src/saev/data/buffers.py</code> <pre><code>def qsize(self) -&gt; int:\n    \"\"\"Approximate number of filled slots (race-safe enough for tests).\"\"\"\n    return (self.head.value - self.tail.value) % (1 &lt;&lt; 64)\n</code></pre>"},{"location":"api/data/clip/","title":"saev.data.clip","text":""},{"location":"api/data/clip/#saev.data.clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n        _, ckpt = ckpt.split(\"hf-hub:\")\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n\n    assert not isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/clip/#saev.data.clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Get patch size for CLIP models.</p>"},{"location":"api/data/clip/#saev.data.clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    import open_clip\n\n    from .. import helpers\n\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/datasets/","title":"saev.data.datasets","text":""},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025","title":"<code>BirdClef2025(root=pathlib.Path('data/birdclef-2025'), split='train_audio')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for BirdCLEF 2025 dataset, filtering to only bird species (Aves).</p> <p>See https://www.kaggle.com/competitions/birdclef-2025/data for more information.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of bird audio samples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.root","title":"<code>root = pathlib.Path('data/birdclef-2025')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory containing the BirdCLEF 2025 data.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025.split","title":"<code>split = 'train_audio'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which data split to use.</p>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset","title":"<code>BirdClef2025Dataset(cfg, *, audio_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset for BirdCLEF 2025 filtered to bird species only (class_name == 'Aves').</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: BirdClef2025,\n    *,\n    audio_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    import polars as pl\n\n    self.cfg = cfg\n    self.audio_transform = audio_transform\n    self.sample_transform = sample_transform\n\n    # Load taxonomy and filter to birds only\n    taxonomy = pl.read_csv(cfg.root / \"taxonomy.csv\", infer_schema_length=None)\n    taxonomy = taxonomy.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n    birds = taxonomy.filter(pl.col(\"class_name\") == \"Aves\")\n    bird_labels = set(birds[\"primary_label\"].to_list())\n\n    # Build label -&gt; target mapping from bird species only\n    sorted_labels = sorted(bird_labels)\n    self.label_to_target = {label: i for i, label in enumerate(sorted_labels)}\n    self.target_to_label = {i: label for label, i in self.label_to_target.items()}\n\n    if cfg.split == \"train_audio\":\n        train = pl.read_csv(cfg.root / \"train.csv\", infer_schema_length=None)\n        train = train.with_columns(pl.col(\"primary_label\").cast(pl.Utf8))\n        train_birds = train.filter(pl.col(\"primary_label\").is_in(bird_labels))\n        self.samples = [\n            {\"label\": row[\"primary_label\"], \"filename\": row[\"filename\"]}\n            for row in train_birds.iter_rows(named=True)\n        ]\n    elif cfg.split == \"train_soundscapes\":\n        soundscapes_dpath = cfg.root / \"train_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    elif cfg.split == \"test_soundscapes\":\n        soundscapes_dpath = cfg.root / \"test_soundscapes\"\n        self.samples = [\n            {\"label\": None, \"filename\": f.name}\n            for f in sorted(soundscapes_dpath.iterdir())\n            if f.suffix == \".ogg\"\n        ]\n    else:\n        tp.assert_never(cfg.split)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.BirdClef2025Dataset.n_classes","title":"<code>n_classes</code>  <code>property</code>","text":"<p>Number of bird species.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10","title":"<code>Cifar10(name='uoft-cs/cifar10', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace CIFAR-10.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.name","title":"<code>name = 'uoft-cs/cifar10'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.root","title":"<code>root</code>  <code>property</code>","text":"<p>Dummy path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Cifar10.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. Can be 'train' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig","title":"<code>DatasetConfig</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Abstract base class for dataset configurations.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.n_examples","title":"<code>n_examples</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Number of examples in the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.DatasetConfig.root","title":"<code>root</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg","title":"<code>FakeImg(n_examples=10)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg","title":"<code>FakeImgSeg(n_examples=10, content_tokens_per_example=16, n_classes=3, bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Tiny synthetic segmentation dataset for tests.</p> <p>Generates dummy RGB images and pixel-level segmentation masks, mimicking the behavior of real segmentation datasets like ImgSegFolder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which class index is considered background.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.content_tokens_per_example","title":"<code>content_tokens_per_example = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_classes","title":"<code>n_classes = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of segmentation classes.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.n_examples","title":"<code>n_examples = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of examples.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSeg.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.FakeImgSegDataset","title":"<code>FakeImgSegDataset(cfg, *, img_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Synthetic segmentation dataset providing pixel-level segmentation masks.</p> <p>Mimics ImgSegFolderDataset by providing:</p> <ul> <li>image: a dummy RGB PIL image</li> <li>segmentation: a PIL image with pixel-level class labels</li> <li>index, target, label</li> </ul> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(\n    self,\n    cfg: FakeImgSeg,\n    *,\n    img_transform=None,\n    mask_transform=None,\n    sample_transform=None,\n):\n    self.cfg = cfg\n    self.img_transform = img_transform\n    self.mask_transform = mask_transform\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet","title":"<code>Imagenet(name='ILSVRC/imagenet-1k', split='train')</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for HuggingFace Imagenet.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of images in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires loading the dataset. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.name","title":"<code>name = 'ILSVRC/imagenet-1k'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset name on HuggingFace. Don't need to change this..</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.root","title":"<code>root</code>  <code>property</code>","text":"<p>Root directory path for the dataset.</p>"},{"location":"api/data/datasets/#saev.data.datasets.Imagenet.split","title":"<code>split = 'train'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dataset split. For the default ImageNet-1K dataset, can either be 'train', 'validation' or 'test'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder","title":"<code>ImgFolder(root=pathlib.Path('./data/split'))</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p> <p>Configuration for a generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly, but is non-trivial to calculate because it requires walking the directory structure. If you need to reference this number very often, cache it in a local variable.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolder.root","title":"<code>root = pathlib.Path('./data/split')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored. Can be a glob pattern to match multiple directories.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset","title":"<code>ImgFolderDataset(*args, sample_transform=None, **kwargs)</code>","text":"<p>               Bases: <code>ImageFolder</code></p> <p>A generic image folder dataset that matches the structure used in PyTorch's ImageFolder.</p> <p>The datset must be laid out in:</p> <pre><code>root/class1/image1.png\nroot/class1/helloworld.jpg\n...\nroot/classN/123.jpeg\nroot/classN/abc.webp\n</code></pre> <p>If you don't have a class structure, you can add a dummy \"all\" folder instead of a class folder.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __init__(self, *args, sample_transform: Callable | None = None, **kwargs):\n    super().__init__(*args, **kwargs)\n    self.sample_transform = sample_transform\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgFolderDataset.__getitem__","title":"<code>__getitem__(index)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>index</code> <code>int</code> <p>Index</p> required <p>Returns:</p> Type Description <code>dict[str, object]</code> <p>dict with keys 'data', 'index', 'target' and 'label'.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>def __getitem__(self, index: int) -&gt; dict[str, object]:\n    \"\"\"\n    Args:\n        index: Index\n\n    Returns:\n        dict with keys 'data', 'index', 'target' and 'label'.\n    \"\"\"\n    path, target = self.samples[index]\n    image = self.loader(path)\n    if self.transform is not None:\n        image = self.transform(image)\n    if self.target_transform is not None:\n        target = self.target_transform(target)\n\n    sample = {\n        \"data\": image,\n        \"target\": target,\n        \"label\": self.classes[target],\n        \"index\": index,\n    }\n\n    if self.sample_transform is not None:\n        sample = self.sample_transform(sample)\n\n    return sample\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder","title":"<code>ImgSegFolder(root=pathlib.Path('./data/segdataset'), split='training', labels_csv='labels.csv', bg_label=0)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>DatasetConfig</code></p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.bg_label","title":"<code>bg_label = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Background label.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.labels_csv","title":"<code>labels_csv = 'labels.csv'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>CSV file with columns: stem,label1,label2,... First column must be 'stem'.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.n_examples","title":"<code>n_examples</code>  <code>property</code>","text":"<p>Number of examples in the dataset. Calculated on the fly by counting image files in root/images/split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.root","title":"<code>root = pathlib.Path('./data/segdataset')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where the class folders with images are stored.</p>"},{"location":"api/data/datasets/#saev.data.datasets.ImgSegFolder.split","title":"<code>split = 'training'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data split.</p>"},{"location":"api/data/datasets/#saev.data.datasets.get_dataset","title":"<code>get_dataset(cfg, *, data_transform=None, mask_transform=None, sample_transform=None)</code>","text":"<p>Gets the dataset for the current experiment; delegates construction to dataset-specific functions.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Config</code> <p>Config for the dataset.</p> required <code>data_tr</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> required <code>mask_tr</code> <p>Transform to be applied to masks.</p> required <code>dict_tr</code> <p>Transform to be applied to the entire sample dict.</p> required <p>Returns:     A dataset that has dictionaries with <code>'data'</code>, <code>'index'</code>, <code>'target'</code>, and <code>'label'</code> keys containing examples.</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef get_dataset(\n    cfg: Config,\n    *,\n    data_transform: Callable = None,\n    mask_transform: Callable | None = None,\n    sample_transform: Callable | None = None,\n):\n    \"\"\"\n    Gets the dataset for the current experiment; delegates construction to dataset-specific functions.\n\n    Args:\n        cfg: Config for the dataset.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        dict_tr: Transform to be applied to the entire sample dict.\n    Returns:\n        A dataset that has dictionaries with `'data'`, `'index'`, `'target'`, and `'label'` keys containing examples.\n    \"\"\"\n    # TODO: Can we reduce duplication? Or is it nice to see that there is no magic here?\n    if isinstance(cfg, Imagenet):\n        return ImagenetDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, Cifar10):\n        return Cifar10Dataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, ImgSegFolder):\n        return ImgSegFolderDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, ImgFolder):\n        ds = [\n            ImgFolderDataset(\n                root, transform=data_transform, sample_transform=sample_transform\n            )\n            for root in glob.glob(str(cfg.root), recursive=True)\n        ]\n        if len(ds) == 1:\n            return ds[0]\n        else:\n            return torch.utils.data.ConcatDataset(ds)\n    elif isinstance(cfg, FakeImg):\n        return FakeImgDataset(\n            cfg, img_transform=data_transform, sample_transform=sample_transform\n        )\n    elif isinstance(cfg, FakeImgSeg):\n        return FakeImgSegDataset(\n            cfg,\n            img_transform=data_transform,\n            mask_transform=mask_transform,\n            sample_transform=sample_transform,\n        )\n    elif isinstance(cfg, BirdClef2025):\n        return BirdClef2025Dataset(\n            cfg, audio_transform=data_transform, sample_transform=sample_transform\n        )\n    else:\n        tp.assert_never(cfg)\n</code></pre>"},{"location":"api/data/datasets/#saev.data.datasets.is_img_seg_dataset","title":"<code>is_img_seg_dataset(data_cfg)</code>","text":"<p>Check if a dataset configuration is for an image segmentation dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data_cfg</code> <code>DatasetConfig</code> <p>Dataset configuration</p> required <p>Returns:</p> Type Description <code>bool</code> <p>True if this is an image segmentation dataset that should have labels.bin</p> Source code in <code>src/saev/data/datasets.py</code> <pre><code>@beartype.beartype\ndef is_img_seg_dataset(data_cfg: DatasetConfig) -&gt; bool:\n    \"\"\"\n    Check if a dataset configuration is for an image segmentation dataset.\n\n    Args:\n        data_cfg: Dataset configuration\n\n    Returns:\n        True if this is an image segmentation dataset that should have labels.bin\n    \"\"\"\n    return isinstance(data_cfg, (FakeImgSeg, ImgSegFolder))\n</code></pre>"},{"location":"api/data/dinov2/","title":"saev.data.dinov2","text":""},{"location":"api/data/dinov3/","title":"saev.data.dinov3","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config","title":"<code>Config(img_size=224, patch_size=16, in_chans=3, pos_embed_rope_base=100.0, pos_embed_rope_min_period=None, pos_embed_rope_max_period=None, pos_embed_rope_normalize_coords='separate', pos_embed_rope_dtype='bf16', embed_dim=768, depth=12, num_heads=12, ffn_ratio=4.0, qkv_bias=True, ffn_layer='mlp', ffn_bias=True, proj_bias=True, n_storage_tokens=0, mask_k_bias=False, untie_global_and_local_cls_norm=False, device=None)</code>  <code>dataclass</code>","text":""},{"location":"api/data/dinov3/#saev.data.dinov3.Config.depth","title":"<code>depth = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of transformer blocks.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.device","title":"<code>device = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Device for tensor operations.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.embed_dim","title":"<code>embed_dim = 768</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Embedding dimension for transformer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_bias","title":"<code>ffn_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in feed-forward network.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_layer","title":"<code>ffn_layer = 'mlp'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Type of feed-forward network layer.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.ffn_ratio","title":"<code>ffn_ratio = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Feed-forward network expansion ratio.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.img_size","title":"<code>img_size = 224</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Image width and height in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.in_chans","title":"<code>in_chans = 3</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of input image channels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.mask_k_bias","title":"<code>mask_k_bias = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to mask K bias in attention.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.n_storage_tokens","title":"<code>n_storage_tokens = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of storage/register tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.num_heads","title":"<code>num_heads = 12</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of attention heads.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.patch_size","title":"<code>patch_size = 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of each patch in pixels.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_base","title":"<code>pos_embed_rope_base = 100.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Base frequency for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_dtype","title":"<code>pos_embed_rope_dtype = 'bf16'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data type for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_max_period","title":"<code>pos_embed_rope_max_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_min_period","title":"<code>pos_embed_rope_min_period = None</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Minimum period for RoPE positional encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.pos_embed_rope_normalize_coords","title":"<code>pos_embed_rope_normalize_coords = 'separate'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Coordinate normalization method for RoPE encoding.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.proj_bias","title":"<code>proj_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in output projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.qkv_bias","title":"<code>qkv_bias = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use bias in QKV projection.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.Config.untie_global_and_local_cls_norm","title":"<code>untie_global_and_local_cls_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use separate norms for global and local CLS tokens.</p>"},{"location":"api/data/dinov3/#saev.data.dinov3.PatchEmbed","title":"<code>PatchEmbed(img_size=224, patch_size=16, in_chans=3, embed_dim=768, flatten_embedding=True)</code>","text":"<p>               Bases: <code>Module</code></p> <p>2D image to patch embedding: (B,C,H,W) -&gt; (B,N,D)</p> <p>Parameters:</p> Name Type Description Default <code>img_size</code> <code>int | tuple[int, int]</code> <p>Image size.</p> <code>224</code> <code>patch_size</code> <code>int | tuple[int, int]</code> <p>Patch token size.</p> <code>16</code> <code>in_chans</code> <code>int</code> <p>Number of input image channels.</p> <code>3</code> <code>embed_dim</code> <code>int</code> <p>Number of linear projection output channels.</p> <code>768</code> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(\n    self,\n    img_size: int | tuple[int, int] = 224,\n    patch_size: int | tuple[int, int] = 16,\n    in_chans: int = 3,\n    embed_dim: int = 768,\n    flatten_embedding: bool = True,\n) -&gt; None:\n    super().__init__()\n\n    image_hw = make_2tuple(img_size)\n    patch_hw = make_2tuple(patch_size)\n\n    self.image_hw = image_hw\n    self.patch_hw = patch_hw\n\n    self.in_chans = in_chans\n    self.embed_dim = embed_dim\n\n    self.proj = nn.Conv2d(\n        in_chans, embed_dim, kernel_size=patch_hw, stride=patch_hw\n    )\n    self.k = patch_hw[0]\n    assert self.proj.kernel_size == (self.k, self.k)\n    assert self.proj.stride == (self.k, self.k)\n    assert self.proj.padding == (0, 0)\n    assert self.proj.groups == 1\n    assert self.proj.dilation == (1, 1)\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    name = self._parse_name(ckpt)\n    self.model = load(name, ckpt)\n\n    self._ckpt = name\n    self.logger = logging.getLogger(f\"dinov3/{name}\")\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    import functools\n\n    return functools.partial(\n        transforms.resize_to_patch_grid,\n        p=int(16 * scale),\n        n=n_patches_per_img,\n        resample=resample,\n    )\n</code></pre>"},{"location":"api/data/dinov3/#saev.data.dinov3.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/dinov3.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    img_transform = v2.Compose([\n        transforms.FlexResize(patch_size=16, n_patches=n_patches_per_img),\n        v2.ToImage(),\n        v2.ToDtype(torch.float32, scale=True),\n        v2.Normalize(mean=[0.4850, 0.4560, 0.4060], std=[0.2290, 0.2240, 0.2250]),\n    ])\n    sample_transform = transforms.Patchify(\n        patch_size=16, n_patches=n_patches_per_img\n    )\n    return img_transform, sample_transform\n</code></pre>"},{"location":"api/data/fake_clip/","title":"saev.data.fake_clip","text":"<p>Fake CLIP model for testing with tiny-open-clip-model.</p> <p>This module provides a test-only vision transformer that works with the tiny-open-clip-model from HuggingFace, which uses 8x8 images and 2x2 patches instead of the standard 224x224 images with 16x16 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Transformer</code>, <code>Module</code></p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    # Only support the tiny test model\n    assert ckpt == \"hf-hub:hf-internal-testing/tiny-open-clip-model\", (\n        f\"FakeClip only supports tiny-open-clip-model, got {ckpt}\"\n    )\n\n    clip, _ = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    self._ckpt = ckpt\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model.eval()\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.patch_size","title":"<code>patch_size</code>  <code>property</code>","text":"<p>Tiny model uses 2x2 patches.</p>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for tiny model (8x8 images).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for tiny model (8x8 images).\"\"\"\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # Tiny model uses 8x8 images\n        size_px = (int(8 * scale), int(8 * scale))\n        return img.resize(size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/fake_clip/#saev.data.fake_clip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/fake_clip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    _, img_transform = open_clip.create_model_from_pretrained(\n        ckpt, cache_dir=helpers.get_cache_dir()\n    )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/indexed/","title":"saev.data.indexed","text":""},{"location":"api/data/indexed/#saev.data.indexed.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset","title":"<code>Dataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/indexed/#saev.data.indexed.Dataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/models/","title":"saev.data.models","text":""},{"location":"api/data/models/#saev.data.models.Transformer","title":"<code>Transformer</code>","text":"<p>               Bases: <code>ABC</code></p> <p>Protocol defining the interface for all Transformer models.</p>"},{"location":"api/data/models/#saev.data.models.Transformer.patch_size","title":"<code>patch_size</code>  <code>abstractmethod</code> <code>property</code>","text":"<p>Patch size in pixels (e.g., 14 or 16).</p>"},{"location":"api/data/models/#saev.data.models.Transformer.forward","title":"<code>forward(batch)</code>  <code>abstractmethod</code>","text":"<p>Run forward pass on batch of images.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef forward(\n    self, batch: Float[Tensor, \"batch 3 width height\"]\n) -&gt; Float[Tensor, \"batch patches dim\"]:\n    \"\"\"Run forward pass on batch of images.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_residuals","title":"<code>get_residuals()</code>  <code>abstractmethod</code>","text":"<p>Return the list of residual blocks/layers for hook registration.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_residuals(self) -&gt; list[torch.nn.Module]:\n    \"\"\"Return the list of residual blocks/layers for hook registration.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.get_token_i","title":"<code>get_token_i(content_tokens_per_example)</code>  <code>abstractmethod</code>","text":"<p>Return indices for selecting relevant tokens from activations.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@abc.abstractmethod\ndef get_token_i(self, content_tokens_per_example: int) -&gt; slice | torch.Tensor:\n    \"\"\"Return indices for selecting relevant tokens from activations.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_resize","title":"<code>make_resize(ckpt, content_tokens_per_example, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_resize(\n    ckpt: str,\n    content_tokens_per_example: int,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.Transformer.make_transforms","title":"<code>make_transforms(ckpt, content_tokens_per_example)</code>  <code>abstractmethod</code> <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (data_transform, dict_transform | None).</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@staticmethod\n@abc.abstractmethod\ndef make_transforms(\n    ckpt: str, content_tokens_per_example: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (data_transform, dict_transform | None).\"\"\"\n</code></pre>"},{"location":"api/data/models/#saev.data.models.list_families","title":"<code>list_families()</code>","text":"<p>List all ViT family names.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>def list_families() -&gt; list[str]:\n    \"\"\"List all ViT family names.\"\"\"\n    return list(_global_model_registry.keys())\n</code></pre>"},{"location":"api/data/models/#saev.data.models.load_model_cls","title":"<code>load_model_cls(family)</code>","text":"<p>Load a transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef load_model_cls(family: str) -&gt; type[Transformer]:\n    \"\"\"Load a transformer family's class.\"\"\"\n    if family not in _global_model_registry:\n        raise ValueError(f\"Family '{family}' not found.\")\n\n    return _global_model_registry[family]\n</code></pre>"},{"location":"api/data/models/#saev.data.models.register_family","title":"<code>register_family(cls)</code>","text":"<p>Register a new transformer family's class.</p> Source code in <code>src/saev/data/models.py</code> <pre><code>@beartype.beartype\ndef register_family(cls: type[Transformer]):\n    \"\"\"Register a new transformer family's class.\"\"\"\n    if cls.family in _global_model_registry:\n        logger.warning(\"Overwriting key '%s' in registry.\", cls.family)\n    _global_model_registry[cls.family] = cls\n</code></pre>"},{"location":"api/data/ordered/","title":"saev.data.ordered","text":"<p>Ordered (sequential) dataloader for activation data.</p> <p>This module provides a high-throughput dataloader that reads activation data from disk shards in sequential order, without shuffling. The implementation uses a single-threaded manager process to ensure data is delivered in the exact order it appears on disk.</p> <p>Patch labels are provided if there is a labels.bin file on disk.</p> <p>See the design decisions in src/saev/data/performance.md.</p> Usage <p>cfg = Config(shards=\"./shards\", layer=13, batch_size=4096) dataloader = DataLoader(cfg) for batch in dataloader: ...     activations = batch[\"act\"]  # [batch_size, d_model] ...     image_indices = batch[\"example_idx\"]  # [batch_size] ...     patch_indices = batch[\"token_idx\"]  # [batch_size] ...     patch_labels = batch[\"patch_labels\"]  # [batch_size]</p>"},{"location":"api/data/ordered/#saev.data.ordered.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/ordered/#saev.data.ordered.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/pe/","title":"saev.data.pe","text":"<p>Perception Encoder (PE) models from Meta (Bolya et al., 2025).</p> <p>PE-Core: CLIP-style model for language alignment. PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Both are available via timm.</p>"},{"location":"api/data/pe/#saev.data.pe.Core","title":"<code>Core(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Core: CLIP-style model for language alignment.</p> <p>Available checkpoints: - vit_pe_core_large_patch14_336.fb (L/14, 336px) - vit_pe_core_base_patch16_224.fb (B/16, 224px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/pe/#saev.data.pe.Spatial","title":"<code>Spatial(ckpt)</code>","text":"<p>               Bases: <code>_Base</code></p> <p>PE-Spatial: Dense prediction model distilled from SAM 2.1.</p> <p>Available checkpoints: - vit_pe_spatial_large_patch14_448.fb (L/14, 448px) - vit_pe_spatial_base_patch16_512.fb (B/16, 512px)</p> Source code in <code>src/saev/data/pe.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n    self._ckpt = ckpt\n    self.logger = logging.getLogger(f\"{self.family}/{ckpt}\")\n\n    # Load model without classifier head, outputting patch features\n    self.model = timm.create_model(ckpt, pretrained=True, num_classes=0)\n    self.model.eval()\n\n    # Get data config for transforms\n    self._data_config = timm.data.resolve_model_data_config(self.model)\n</code></pre>"},{"location":"api/data/saev.data/","title":"saev.data","text":""},{"location":"api/data/saev.data/#saev.data.IndexedConfig","title":"<code>IndexedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, debug=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading indexed activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset","title":"<code>IndexedDataset(cfg)</code>","text":"<p>               Bases: <code>Dataset</code></p> <p>Dataset of activations from disk.</p> <p>Attributes:</p> Name Type Description <code>cfg</code> <code>Config</code> <p>Configuration set via CLI args.</p> <code>md</code> <code>Metadata</code> <p>Activations metadata; automatically loaded from disk.</p> <code>layer_idx</code> <code>int</code> <p>Layer index into the shards if we are choosing a specific layer.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    # Check if labels.bin exists\n    labels_path = os.path.join(self.cfg.shards, \"labels.bin\")\n    self.labels_mmap = None\n    if os.path.exists(labels_path):\n        self.labels_mmap = np.memmap(\n            labels_path,\n            mode=\"r\",\n            dtype=np.uint8,\n            shape=(self.md.n_examples, self.md.content_tokens_per_example),\n        )\n\n    self.index_map = shards.IndexMap(self.md, self.cfg.tokens, self.cfg.layer)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.d_model","title":"<code>d_model</code>  <code>property</code>","text":"<p>Dimension of the underlying vision transformer's embedding space.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.Example","title":"<code>Example</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.IndexedDataset.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/indexed.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    return len(self.index_map)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/saev.data/#saev.data.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/saev.data/#saev.data.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedConfig","title":"<code>OrderedConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-2, batch_size=1024 * 16, batch_timeout_s=30.0, drop_last=False, buffer_size=64, debug=False, log_every_s=30.0)</code>  <code>dataclass</code>","text":"<p>Configuration for loading ordered (non-shuffled) activation data from disk</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['content']</code> <p>Which kinds of tokens to use. 'special' indicates the special tokens token (if any). 'content' returns content tokens. 'all' returns both content and special tokens.</p> <code>layer</code> <code>int | Literal['all']</code> <p>Which ViT layer(s) to read from disk. <code>-2</code> selects the second-to-last layer. <code>\"all\"</code> enumerates every recorded layer.</p> <code>batch_size</code> <code>int</code> <p>Batch size.</p> <code>batch_timeout_s</code> <code>float</code> <p>How long to wait for at least one batch.</p> <code>drop_last</code> <code>bool</code> <p>Whether to drop the last batch if it's smaller than the others.</p> <code>buffer_size</code> <code>int</code> <p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p> <code>debug</code> <code>bool</code> <p>Whether the dataloader process should log debug messages.</p> <code>log_every_s</code> <code>float</code> <p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader","title":"<code>OrderedDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that reads data from disk shards in order (no shuffling).</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    self.md = shards.Metadata.load(self.cfg.shards)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self.cfg.shards)\n    shard_info.validate(self.cfg.shards)\n\n    self.logger = logging.getLogger(\"ordered.DataLoader\")\n    self.ctx = mp.get_context()\n    self.manager_proc = None\n    self.batch_queue = None\n    self.stop_event = None\n    self._n_samples = self._calculate_n_samples()\n    self.logger.info(\n        \"Initialized ordered.DataLoader with %d samples. (debug=%s)\",\n        self.n_samples,\n        self.cfg.debug,\n    )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches in order.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterable[ExampleBatch]:\n    \"\"\"Yields batches in order.\"\"\"\n    self._start_manager()\n    n = 0\n\n    try:\n        while n &lt; self.n_samples:\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                batch = self.batch_queue.get(timeout=self.cfg.batch_timeout_s)\n                actual_batch_size = batch[\"act\"].shape[0]\n\n                # Handle drop_last\n                if (\n                    self.cfg.drop_last\n                    and actual_batch_size &lt; self.cfg.batch_size\n                    and n + actual_batch_size &gt;= self.n_samples\n                ):\n                    break\n\n                n += actual_batch_size\n                yield self.ExampleBatch(**batch)\n                continue\n            except queue.Empty:\n                self.logger.info(\n                    \"Did not get a batch from manager process in %.1fs seconds.\",\n                    self.cfg.batch_timeout_s,\n                )\n            except FileNotFoundError:\n                self.logger.info(\"Manager process (probably) closed.\")\n                continue\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {n}/{self.n_samples} samples.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.OrderedDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/ordered.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    if self.cfg.drop_last:\n        return self.n_samples // self.cfg.batch_size\n    else:\n        return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig","title":"<code>ShuffledConfig(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledConfig.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader","title":"<code>ShuffledDataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.ShuffledDataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/saev.data/#saev.data.make_ordered_config","title":"<code>make_ordered_config(shuffled_cfg, **overrides)</code>","text":"<p>Create an <code>OrderedConfig</code> from a <code>ShuffledConfig</code>, with optional overrides.</p> <p>Defaults come from <code>shuffled_cfg</code> for fields present in <code>OrderedConfig</code>, and <code>overrides</code> take precedence. Unknown override fields raise <code>TypeError</code> from the <code>OrderedConfig</code> constructor, mirroring <code>dataclasses.replace</code>.</p> Source code in <code>src/saev/data/__init__.py</code> <pre><code>@beartype.beartype\ndef make_ordered_config(\n    shuffled_cfg: ShuffledConfig, **overrides: object\n) -&gt; OrderedConfig:\n    \"\"\"Create an `OrderedConfig` from a `ShuffledConfig`, with optional overrides.\n\n    Defaults come from `shuffled_cfg` for fields present in `OrderedConfig`, and `overrides` take precedence. Unknown override fields raise `TypeError` from the `OrderedConfig` constructor, mirroring `dataclasses.replace`.\n    \"\"\"\n    params: dict[str, object] = {}\n    for f in dataclasses.fields(OrderedConfig):\n        name = f.name\n        if hasattr(shuffled_cfg, name):\n            params[name] = getattr(shuffled_cfg, name)\n    params.update(overrides)\n    return OrderedConfig(**params)\n</code></pre>"},{"location":"api/data/shards/","title":"saev.data.shards","text":"<p>Library code for reading and writing sharded activations to disk.</p>"},{"location":"api/data/shards/#saev.data.shards.Index","title":"<code>Index(*, idx, example_idx, content_token_idx, shard_idx, example_idx_in_shard, layer_idx_in_shard, token_idx_in_shard)</code>  <code>dataclass</code>","text":"<p>Attributes:</p> Name Type Description <code>idx</code> <code>int</code> <p>The index of the activation.</p> <code>example_idx</code> <code>int</code> <p>The index of the original example (image, audio clip etc).</p> <code>content_token_idx</code> <code>int</code> <p>The token's index within an example's content. -1 for all special tokens.</p> <code>shard_idx</code> <code>int</code> <p>The shard index.</p> <code>example_idx_in_shard</code> <code>int</code> <p>The example index along the examples axis in a shard.</p> <code>token_idx_in_shard</code> <code>int</code> <p>The token index along the tokens axis in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.IndexMap","title":"<code>IndexMap(md, tokens, layer)</code>","text":"<p>Attributes:</p> Name Type Description <code>md</code> <code>Metadata</code> <p>Metadata</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to load.</p> <code>layer</code> <code>int</code> <p>Which layer to load.</p> <code>layer_idx_lookup</code> <code>dict[int, int]</code> <p>The lookup from a transformer layer to the layer idx in the shard.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    md: Metadata,\n    tokens: tp.Literal[\"special\", \"content\", \"all\"],\n    layer: int | tp.Literal[\"all\"],\n):\n    if tokens == \"special\":\n        assert md.cls_token\n\n    self.md = md\n    self.tokens = tokens\n    self.layer = layer\n\n    if isinstance(layer, int):\n        err_msg = f\"No matche for layer; {layer} not in {md.layers}.\"\n        assert layer in md.layers, err_msg\n\n    self.layer_idx_lookup = {layer: i for i, layer in enumerate(md.layers)}\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.IndexMap.__len__","title":"<code>__len__()</code>","text":"<p>Dataset length depends on <code>patches</code> and <code>layer</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"\n    Dataset length depends on `patches` and `layer`.\n    \"\"\"\n    match (self.tokens, self.layer):\n        case (\"special\", \"all\"):\n            # Return a CLS token from a random example and random layer.\n            return self.md.n_examples * len(self.md.layers)\n        case (\"special\", int()):\n            # Return a CLS token from a random example and fixed layer.\n            return self.md.n_examples\n        case (\"content\", int()):\n            # Return a patch from a random example, fixed layer, and random patch.\n            return self.md.n_examples * self.md.content_tokens_per_example\n        case (\"content\", \"all\"):\n            # Return a patch from a random example, random layer and random patch.\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.content_tokens_per_example\n            )\n        case (\"all\", int()):\n            # Return a token from a random example, fixed layer, and random token (including special).\n            return self.md.n_examples * self.md.tokens_per_example\n        case (\"all\", \"all\"):\n            # Return a token from a random example, random layer and random token (including special).\n            return (\n                self.md.n_examples\n                * len(self.md.layers)\n                * self.md.tokens_per_example\n            )\n        case _:\n            tp.assert_never((self.cfg.tokens, self.cfg.layer))\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter","title":"<code>LabelsWriter(shards_dir, md)</code>","text":"<p>LabelsWriter handles writing patch-level segmentation labels to a single binary file.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>The shard directory; $SAEV_SCRATCH/saev/shards/ required <code>md</code> <code>Metadata</code> <p>The Metadata object.</p> required <p>Attributes:</p> Name Type Description <code>labels</code> <code>UInt8[ndarray, 'n_examples n_patches']</code> <p>The integer patch labels.</p> <code>labels_path</code> <code>Path</code> <p>Where the integer patch labels are stored.</p> <code>md</code> <code>Metadata</code> <p>The dataset metadata.</p> <code>has_written</code> <code>bool</code> <p>Whether we have written any data to <code>self.labels</code>.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_dir: pathlib.Path, md: Metadata):\n    assert disk.is_shards_dir(shards_dir)\n    self.logger = logging.getLogger(\"labels-writer\")\n    self.md = md\n    self.has_written = False\n\n    # Always create memory-mapped file for labels\n    # If nothing is written, it will be deleted in flush()\n    self.labels_path = shards_dir / \"labels.bin\"\n    self.labels = np.memmap(\n        self.labels_path,\n        mode=\"w+\",\n        dtype=np.uint8,\n        shape=(self.md.n_examples, self.md.content_tokens_per_example),\n    )\n    self.logger.info(\"Opened labels file '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.flush","title":"<code>flush()</code>","text":"<p>Flush the memory-mapped file to disk if anything was written.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def flush(self) -&gt; None:\n    \"\"\"Flush the memory-mapped file to disk if anything was written.\"\"\"\n    if self.has_written:\n        self.labels.flush()\n        self.logger.info(\"Flushed labels to '%s'.\", self.labels_path)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.LabelsWriter.write_batch","title":"<code>write_batch(batch_labels, start_idx)</code>","text":"<p>Write a batch of labels to the memory-mapped file.</p> <p>Parameters:</p> Name Type Description Default <code>batch_labels</code> <code>ndarray | Tensor</code> <p>Array of shape (batch_size, content_tokens_per_example) with uint8 dtype</p> required <code>start_idx</code> <code>int</code> <p>Starting index in the global labels array</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef write_batch(self, batch_labels: np.ndarray | Tensor, start_idx: int):\n    \"\"\"\n    Write a batch of labels to the memory-mapped file.\n\n    Args:\n        batch_labels: Array of shape (batch_size, content_tokens_per_example) with uint8 dtype\n        start_idx: Starting index in the global labels array\n    \"\"\"\n    # Convert to numpy if needed\n    if isinstance(batch_labels, torch.Tensor):\n        batch_labels = batch_labels.cpu().numpy()\n\n    batch_size = len(batch_labels)\n    assert start_idx + batch_size &lt;= self.md.n_examples\n    assert batch_labels.shape == (batch_size, self.md.content_tokens_per_example)\n    assert batch_labels.dtype == np.uint8\n\n    self.labels[start_idx : start_idx + batch_size] = batch_labels\n    self.has_written = True\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata","title":"<code>Metadata(*, family, ckpt, layers, content_tokens_per_example, cls_token, d_model, n_examples, max_tokens_per_shard, data, dataset, pixel_agg=PixelAgg.MAJORITY, dtype='float32', protocol='2.1')</code>  <code>dataclass</code>","text":"<p>Metadata for a sharded set of transformer activations.</p> <p>Parameters:</p> Name Type Description Default <code>family</code> <code>Literal['bird-mae', 'clip', 'dinov2', 'dinov3', 'fake-clip', 'pe-core', 'pe-spatial', 'siglip']</code> <p>The transformer family.</p> required <code>ckpt</code> <code>str</code> <p>The transformer checkpoint.</p> required <code>layers</code> <code>tuple[int, ...]</code> <p>Which layers were saved.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>The number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token as well.</p> required <code>d_model</code> <code>int</code> <p>Model hidden dimension.</p> required <code>n_examples</code> <code>int</code> <p>Number of examples.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>The maximum number of tokens per shard.</p> required <code>data</code> <code>str</code> <p>base64-encoded string of pickle.dumps(dataset).</p> required <code>dataset</code> <code>Path</code> <p>Absolute path to the root directory of the original dataset.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>(only for image segmentation datasets) how the pixel-level segmentation labels were aggregated to token-level labels.</p> <code>MAJORITY</code> <code>dtype</code> <code>Literal['float32']</code> <p>How activations are stored.</p> <code>'float32'</code> <code>protocol</code> <code>Literal['1.0.0', '1.1', '2.1']</code> <p>Protocol version.</p> <code>'2.1'</code>"},{"location":"api/data/shards/#saev.data.shards.Metadata.examples_per_shard","title":"<code>examples_per_shard</code>  <code>property</code>","text":"<p>The number of examples per shard based on the protocol.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of examples that fit in a shard.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.hash","title":"<code>hash</code>  <code>property</code>","text":"<p>First 8 bytes of a SHA256 hash of the metadata configuration.</p> <p>Returns:</p> Type Description <code>str</code> <p>Hexadecimal hash string uniquely identifying this configuration.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.n_shards","title":"<code>n_shards</code>  <code>property</code>","text":"<p>Total number of shards needed to store all examples.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of shards required.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.shard_shape","title":"<code>shard_shape</code>  <code>property</code>","text":"<p>Shape of each shard file.</p> <p>Returns:</p> Type Description <code>tuple[int, int, int, int]</code> <p>Tuple of (examples_per_shard, n_layers, tokens_per_example, d_model).</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.tokens_per_example","title":"<code>tokens_per_example</code>  <code>property</code>","text":"<p>Total number of tokens per example including [CLS] token if present.</p> <p>Returns:</p> Type Description <code>int</code> <p>Number of tokens plus one if [CLS] token is included.</p>"},{"location":"api/data/shards/#saev.data.shards.Metadata.dump","title":"<code>dump(shards_root)</code>","text":"<p>Dumps a Metadata object to a metadata.json file in shards_root / hash.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards as described in disk-layout.md.</p> required Source code in <code>src/saev/data/shards.py</code> <pre><code>def dump(self, shards_root: pathlib.Path):\n    \"\"\"\n    Dumps a Metadata object to a metadata.json file in shards_root / hash.\n\n    Args:\n        shards_root: Path to $SAEV_SCRATCH/saev/shards as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_root(shards_root)\n    (shards_root / self.hash).mkdir(exist_ok=True)\n    with open(shards_root / self.hash / \"metadata.json\", \"wb\") as fd:\n        helpers.jdump(self, fd, option=orjson.OPT_INDENT_2)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Metadata.load","title":"<code>load(shards_dir)</code>  <code>classmethod</code>","text":"<p>Loads a Metadata object from a metadata.json file in shards_dir.</p> <p>Parameters:</p> Name Type Description Default <code>shards_dir</code> <code>Path</code> <p>Path to $SAEV_SCRATCH/saev/shards/ as described in disk-layout.md. required Source code in <code>src/saev/data/shards.py</code> <pre><code>@classmethod\ndef load(cls, shards_dir: pathlib.Path) -&gt; tp.Self:\n    \"\"\"\n    Loads a Metadata object from a metadata.json file in shards_dir.\n\n    Args:\n        shards_dir: Path to $SAEV_SCRATCH/saev/shards/&lt;hash&gt; as described in [disk-layout.md](../../developers/disk-layout.md).\n    \"\"\"\n    assert disk.is_shards_dir(shards_dir)\n\n    with open(shards_dir / \"metadata.json\") as fd:\n        dct = json.load(fd)\n    dct[\"layers\"] = tuple(dct.pop(\"layers\"))\n    dct[\"dataset\"] = pathlib.Path(dct[\"dataset\"])\n    dct[\"pixel_agg\"] = PixelAgg(dct[\"pixel_agg\"])\n    return cls(**dct)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.PixelAgg","title":"<code>PixelAgg</code>","text":"<p>               Bases: <code>Enum</code></p> <p>How to aggregate pixel-level segmentation labels to token-level labels (only for image segmentation datasets).</p>"},{"location":"api/data/shards/#saev.data.shards.RecordedTransformer","title":"<code>RecordedTransformer(model, content_tokens_per_example, cls_token, layers)</code>","text":"<p>               Bases: <code>Module</code></p> <p>A wrapper around a transformer model that records intermediate layer activations during forward passes.</p> <p>Parameters:</p> Name Type Description Default <code>model</code> <code>Module</code> <p>The transformer model to wrap.</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether to record the [CLS] token in addition to content tokens.</p> required <code>layers</code> <code>Sequence[int]</code> <p>Which transformer layers to record activations from.</p> required <p>Attributes:</p> Name Type Description <code>model</code> <code>Module</code> <p>The wrapped transformer model.</p> <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> <code>cls_token</code> <code>bool</code> <p>Whether the [CLS] token is included in recorded activations.</p> <code>layers</code> <code>Sequence[int]</code> <p>Tuple of layer indices being recorded.</p> <code>token_i</code> <code>slice</code> <p>Token indices to extract from model outputs.</p> <code>logger</code> <p>Logger instance for this recorder.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(\n    self,\n    model: torch.nn.Module,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    layers: Sequence[int],\n):\n    super().__init__()\n\n    self.model = model\n\n    self.content_tokens_per_example = content_tokens_per_example\n    self.cls_token = cls_token\n    self.layers = layers\n\n    self.token_i = model.get_token_i(content_tokens_per_example)\n\n    self._storage = None\n    self._i = 0\n\n    self.logger = logging.getLogger(f\"recorder({model.name})\")\n\n    for i in self.layers:\n        self.model.get_residuals()[i].register_forward_hook(self.hook)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.Shard","title":"<code>Shard(name, n_examples)</code>  <code>dataclass</code>","text":"<p>A single shard entry in shards.json, recording the filename and number of examples.</p> <p>Attributes:</p> Name Type Description <code>name</code> <code>str</code> <p>The filename of the shard (e.g., \"acts000000.bin\").</p> <code>n_examples</code> <code>int</code> <p>Number of examples stored in this shard.</p>"},{"location":"api/data/shards/#saev.data.shards.ShardInfo","title":"<code>ShardInfo(shards=list())</code>  <code>dataclass</code>","text":"<p>A container for shard metadata as recorded in shards.json.</p> <p>Parameters:</p> Name Type Description Default <code>shards</code> <code>list[Shard]</code> <p>A list of Shard objects.</p> <code>list()</code>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter","title":"<code>ShardWriter(shards_root, md)</code>","text":"<p>ShardWriter is a stateful object that handles sharded activation writing to disk.</p> <p>Parameters:</p> Name Type Description Default <code>shards_root</code> <code>Path</code> <p>The $SAEV_SCRATCH/saev/shards path.</p> required <code>md</code> <code>Metadata</code> <p>The Metadata object for these shards.</p> required <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>The  $SAEV_SCRATCH/saev/shards/. <code>shard</code> <code>int</code> <code>acts_path</code> <code>Path</code> <code>acts</code> <code>Float[ndarray, 'examples_per_shard n_layers all_patches d_model'] | None</code> <code>filled</code> <code>int</code> <code>labels_writer</code> <code>LabelsWriter</code> <p>The LabelsWriter writer.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __init__(self, shards_root: pathlib.Path, md: Metadata):\n    assert disk.is_shards_root(shards_root)\n    self.md = md\n\n    self.logger = logging.getLogger(\"shard-writer\")\n\n    self.shards_dir = shards_root / md.hash\n    self.shards_dir.mkdir(exist_ok=True)\n\n    # builder for shard manifest\n    self._shards: ShardInfo = ShardInfo()\n\n    # Always initialize labels writer (it handles non-seg datasets internally)\n    self.labels_writer = LabelsWriter(self.shards_dir, md)\n\n    self.shard = -1\n    self.acts = None\n    self.next_shard()\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__enter__","title":"<code>__enter__()</code>","text":"<p>Context manager entry.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __enter__(self):\n    \"\"\"Context manager entry.\"\"\"\n    return self\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.__exit__","title":"<code>__exit__(exc_type, exc_val, exc_tb)</code>","text":"<p>Context manager exit - handle cleanup.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>def __exit__(self, exc_type, exc_val, exc_tb):\n    \"\"\"Context manager exit - handle cleanup.\"\"\"\n    self.flush()\n\n    # Delete empty labels file if nothing was written\n    if not self.labels_writer.has_written:\n        if os.path.exists(self.labels_writer.labels_path):\n            os.remove(self.labels_writer.labels_path)\n            self.logger.info(\n                \"Removed empty labels file '%s'.\", self.labels_writer.labels_path\n            )\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.ShardWriter.write_batch","title":"<code>write_batch(activations, start_idx, patch_labels=None)</code>","text":"<p>Write a batch of activations and (optionally) patch labels.</p> <p>Parameters:</p> Name Type Description Default <code>activations</code> <code>Float[Tensor, 'batch n_layers all_patches d_model']</code> <p>Batch of activations to write.</p> required <code>start_idx</code> <code>int</code> <p>Starting index for this batch.</p> required <code>patch_labels</code> <code>UInt8[Tensor, 'batch n_patches'] | None</code> <p>Optional patch labels for segmentation datasets.</p> <code>None</code> Source code in <code>src/saev/data/shards.py</code> <pre><code>def write_batch(\n    self,\n    activations: Float[Tensor, \"batch n_layers all_patches d_model\"],\n    start_idx: int,\n    patch_labels: UInt8[Tensor, \"batch n_patches\"] | None = None,\n) -&gt; None:\n    \"\"\"Write a batch of activations and (optionally) patch labels.\n\n    Args:\n        activations: Batch of activations to write.\n        start_idx: Starting index for this batch.\n        patch_labels: Optional patch labels for segmentation datasets.\n    \"\"\"\n    batch_size = len(activations)\n    end_idx = start_idx + batch_size\n\n    # Write activations (handling sharding)\n    offset = self.md.examples_per_shard * self.shard\n\n    if end_idx &gt;= offset + self.md.examples_per_shard:\n        # We have run out of space in this mmap'ed file. Let's fill it as much as we can.\n        n_fit = offset + self.md.examples_per_shard - start_idx\n        self.acts[start_idx - offset : start_idx - offset + n_fit] = activations[\n            :n_fit\n        ]\n        self.filled = start_idx - offset + n_fit\n\n        # Write labels for the portion that fits\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                labels_to_write = (\n                    patch_labels[:n_fit].cpu().numpy().astype(np.uint8)\n                )\n            elif not isinstance(patch_labels, np.ndarray):\n                labels_to_write = np.array(patch_labels[:n_fit], dtype=np.uint8)\n            else:\n                labels_to_write = patch_labels[:n_fit]\n\n            self.labels_writer.write_batch(labels_to_write, start_idx)\n\n        self.next_shard()\n\n        # Recursively call write_batch for remaining data\n        if n_fit &lt; batch_size:\n            self.write_batch(\n                activations[n_fit:],\n                start_idx + n_fit,\n                patch_labels[n_fit:] if patch_labels is not None else None,\n            )\n    else:\n        msg = f\"0 &lt;= {start_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        assert 0 &lt;= start_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        msg = (\n            f\"0 &lt;= {end_idx} - {offset} &lt;= {offset} + {self.md.examples_per_shard}\"\n        )\n        assert 0 &lt;= end_idx - offset &lt;= offset + self.md.examples_per_shard, msg\n        self.acts[start_idx - offset : end_idx - offset] = activations\n        self.filled = end_idx - offset\n\n        # Write labels if provided\n        if patch_labels is not None:\n            # Convert to numpy uint8 if needed\n            if isinstance(patch_labels, torch.Tensor):\n                patch_labels = patch_labels.cpu().numpy().astype(np.uint8)\n            elif not isinstance(patch_labels, np.ndarray):\n                patch_labels = np.array(patch_labels, dtype=np.uint8)\n\n            self.labels_writer.write_batch(patch_labels, start_idx)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.get_dataloader","title":"<code>get_dataloader(data, *, batch_size, n_workers, data_tr=None, mask_tr=None, sample_tr=None)</code>","text":"<p>Get a dataloader for a default map-style dataset.</p> <p>Parameters:</p> Name Type Description Default <code>data</code> <code>Config</code> <p>Config for the dataset.</p> required <code>batch_size</code> <code>int</code> <p>Batch size.</p> required <code>n_workers</code> <code>int</code> <p>Number of dataloader workers.</p> required <code>data_tr</code> <code>Callable | None</code> <p>Transform to be applied to each 'data' key (typically the raw data).</p> <code>None</code> <code>mask_tr</code> <code>Callable | None</code> <p>Transform to be applied to masks.</p> <code>None</code> <code>sample_tr</code> <code>Callable | None</code> <p>Transform to be applied to the entire sample dict.</p> <code>None</code> <p>Returns:</p> Type Description <code>DataLoader</code> <p>A PyTorch Dataloader that yields dictionaries with <code>'data'</code> keys containing data batches, <code>'index'</code> keys containing original dataset indices and <code>'label'</code> keys containing label batches.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef get_dataloader(\n    data: datasets.Config,\n    *,\n    batch_size: int,\n    n_workers: int,\n    data_tr: Callable | None = None,\n    mask_tr: Callable | None = None,\n    sample_tr: Callable | None = None,\n) -&gt; torch.utils.data.DataLoader:\n    \"\"\"\n    Get a dataloader for a default map-style dataset.\n\n    Args:\n        data: Config for the dataset.\n        batch_size: Batch size.\n        n_workers: Number of dataloader workers.\n        data_tr: Transform to be applied to each 'data' key (typically the raw data).\n        mask_tr: Transform to be applied to masks.\n        sample_tr: Transform to be applied to the entire sample dict.\n\n    Returns:\n        A PyTorch Dataloader that yields dictionaries with `'data'` keys containing data batches, `'index'` keys containing original dataset indices and `'label'` keys containing label batches.\n    \"\"\"\n    dataset = datasets.get_dataset(\n        data, data_transform=data_tr, mask_transform=mask_tr, sample_transform=sample_tr\n    )\n\n    dataloader = torch.utils.data.DataLoader(\n        dataset=dataset,\n        batch_size=batch_size,\n        drop_last=False,\n        num_workers=n_workers,\n        persistent_workers=n_workers &gt; 0,\n        shuffle=False,\n        pin_memory=False,\n    )\n    return dataloader\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.pixel_to_patch_labels","title":"<code>pixel_to_patch_labels(seg, n_patches, patch_size, pixel_agg=PixelAgg.MAJORITY, bg_label=0, max_classes=256)</code>","text":"<p>Convert pixel-level segmentation to patch-level labels using vectorized operations.</p> <p>Parameters:</p> Name Type Description Default <code>seg</code> <code>Image</code> <p>Pixel-level segmentation mask as PIL Image</p> required <code>n_patches</code> <code>int</code> <p>Total number of patches expected</p> required <code>patch_size</code> <code>int</code> <p>Size of each patch in pixels</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>How to aggregate pixel labels into patch labels</p> <code>MAJORITY</code> <code>bg_label</code> <code>int</code> <p>Background label index</p> <code>0</code> <code>max_classes</code> <code>int</code> <p>Maximum number of classes (for bincount)</p> <code>256</code> <p>Returns:</p> Type Description <code>UInt8[Tensor, ' n_patches']</code> <p>Patch labels as uint8 tensor of shape (n_patches,)</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef pixel_to_patch_labels(\n    seg: Image.Image,\n    n_patches: int,\n    patch_size: int,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n    bg_label: int = 0,\n    max_classes: int = 256,\n) -&gt; UInt8[Tensor, \" n_patches\"]:\n    \"\"\"\n    Convert pixel-level segmentation to patch-level labels using vectorized operations.\n\n    Args:\n        seg: Pixel-level segmentation mask as PIL Image\n        n_patches: Total number of patches expected\n        patch_size: Size of each patch in pixels\n        pixel_agg: How to aggregate pixel labels into patch labels\n        bg_label: Background label index\n        max_classes: Maximum number of classes (for bincount)\n\n    Returns:\n        Patch labels as uint8 tensor of shape (n_patches,)\n    \"\"\"\n    # Convert to torch tensor for vectorized operations\n    seg_tensor = torch.from_numpy(np.array(seg, dtype=np.uint8))\n    assert seg_tensor.ndim == 2\n\n    h, w = seg_tensor.shape\n\n    # Calculate patch grid dimensions\n    patch_grid_h = h // patch_size\n    patch_grid_w = w // patch_size\n    assert patch_grid_w * patch_grid_h == n_patches, (\n        f\"Image size {w}x{h} with patch_size {patch_size} gives {patch_grid_w}x{patch_grid_h} = {patch_grid_w * patch_grid_h} patches, expected {n_patches}\"\n    )\n\n    # Reshape into patches using einops: (n_patches, patch_size * patch_size)\n    patches = einops.rearrange(\n        seg_tensor,\n        \"(h p1) (w p2) -&gt; (h w) (p1 p2)\",\n        p1=patch_size,\n        p2=patch_size,\n        h=patch_grid_h,\n        w=patch_grid_w,\n    )\n\n    # Use vectorized bincount approach to get class counts for all patches at once\n    # counts[i, c] = number of times class c appears in patch i\n    offsets = torch.arange(n_patches, device=patches.device).unsqueeze(1) * max_classes\n    flat = (patches + offsets).reshape(-1)\n    counts = torch.bincount(flat, minlength=n_patches * max_classes).reshape(\n        n_patches, max_classes\n    )\n\n    if pixel_agg is PixelAgg.MAJORITY:\n        # Take the most common label in each patch\n        patch_labels = counts.argmax(dim=1)\n    elif pixel_agg is PixelAgg.PREFER_FG:\n        # Take the most common non-background label, or background if all background\n        nonbg = counts.clone()\n        nonbg[:, bg_label] = 0\n        has_nonbg = nonbg.sum(dim=1) &gt; 0\n        nonbg_arg = nonbg.argmax(dim=1)\n        bg_tensor = torch.full_like(nonbg_arg, bg_label)\n        patch_labels = torch.where(has_nonbg, nonbg_arg, bg_tensor)\n    else:\n        tp.assert_never(pixel_agg)\n\n    return patch_labels.to(torch.uint8)\n</code></pre>"},{"location":"api/data/shards/#saev.data.shards.worker_fn","title":"<code>worker_fn(*, family, ckpt, content_tokens_per_example, cls_token, d_model, layers, data, batch_size, n_workers, max_tokens_per_shard, shards_root, device, pixel_agg=PixelAgg.MAJORITY)</code>","text":"<p>Parameters:</p> Name Type Description Default <code>family</code> <code>str</code> <p>Transformer family (dinov2, dinov3, clip, etc).</p> required <code>ckpt</code> <code>str</code> <p>Transformer ckpt (hf-hub:imageomics/bioclip2, etc).</p> required <code>content_tokens_per_example</code> <code>int</code> <p>Number of content tokens per example.</p> required <code>cls_token</code> <code>bool</code> <p>Whether the transformer has a [CLS] token.</p> required <code>d_model</code> <code>int</code> <p>Hidden dimension of transformer.</p> required <code>layers</code> <code>list[int]</code> <p>The layers to record activations for.</p> required <code>data</code> <code>Config</code> <p>Config for the particular (image) dataset to load.</p> required <code>batch_size</code> <code>int</code> <p>Batch size for the dataset.</p> required <code>n_workers</code> <code>int</code> <p>Number of workers for loading examples fromm the dataset.</p> required <code>max_tokens_per_shard</code> <code>int</code> <p>Maximum number of tokens per disk shard.</p> required <code>pixel_agg</code> <code>PixelAgg</code> <p>Optional method for aggregating segmentation label pixels.</p> <code>MAJORITY</code> <code>shards_root</code> <code>Path</code> <p>Where to save shards. Should end with 'shards'. See disk-layout.md; this is $SAEV_SCRATCH/saev/shards.</p> required <code>device</code> <code>str</code> <p>Device for doing the computation.</p> required <p>Returns:</p> Type Description <code>Path</code> <p>Path to the shards directory.</p> Source code in <code>src/saev/data/shards.py</code> <pre><code>@beartype.beartype\ndef worker_fn(\n    *,\n    family: str,\n    ckpt: str,\n    content_tokens_per_example: int,\n    cls_token: bool,\n    d_model: int,\n    layers: list[int],\n    data: datasets.Config,\n    batch_size: int,\n    n_workers: int,\n    max_tokens_per_shard: int,\n    shards_root: pathlib.Path,\n    device: str,\n    pixel_agg: PixelAgg = PixelAgg.MAJORITY,\n) -&gt; pathlib.Path:\n    \"\"\"\n    Args:\n        family: Transformer family (dinov2, dinov3, clip, etc).\n        ckpt: Transformer ckpt (hf-hub:imageomics/bioclip2, etc).\n        content_tokens_per_example: Number of content tokens per example.\n        cls_token: Whether the transformer has a [CLS] token.\n        d_model: Hidden dimension of transformer.\n        layers: The layers to record activations for.\n        data: Config for the particular (image) dataset to load.\n        batch_size: Batch size for the dataset.\n        n_workers: Number of workers for loading examples fromm the dataset.\n        max_tokens_per_shard: Maximum number of tokens per disk shard.\n        pixel_agg: Optional method for aggregating segmentation label pixels.\n        shards_root: Where to save shards. Should end with 'shards'. See [disk-layout.md](../../developers/disk-layout.md); this is $SAEV_SCRATCH/saev/shards.\n        device: Device for doing the computation.\n\n    Returns:\n        Path to the shards directory.\n    \"\"\"\n    from saev import helpers\n    from saev.data import models\n\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n        torch.backends.cudnn.benchmark = True\n        torch.backends.cudnn.deterministic = True\n\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n    logger = logging.getLogger(\"worker_fn\")\n\n    if device == \"cuda\" and not torch.cuda.is_available():\n        logger.warning(\"No CUDA device available, using CPU.\")\n        device = \"cpu\"\n\n    assert shards_root.name == \"shards\"\n\n    model_cls = models.load_model_cls(family)\n    model_instance = model_cls(ckpt).to(device)\n    model = RecordedTransformer(\n        model_instance, content_tokens_per_example, cls_token, layers\n    )\n\n    data_tr, sample_tr = model_cls.make_transforms(ckpt, content_tokens_per_example)\n\n    mask_tr = None\n    if datasets.is_img_seg_dataset(data):\n        # For image segmentation datasets, create a transform that converts pixels to patches\n        # Use make_resize with NEAREST interpolation for segmentation masks\n        seg_resize_tr = model_cls.make_resize(\n            ckpt, content_tokens_per_example, scale=1.0, resample=Image.NEAREST\n        )\n\n        def seg_to_patches(seg):\n            \"\"\"Transform that resizes segmentation and converts to patch labels.\"\"\"\n\n            # Convert to patch labels\n            return pixel_to_patch_labels(\n                seg_resize_tr(seg),\n                content_tokens_per_example,\n                patch_size=model_instance.patch_size,\n                pixel_agg=pixel_agg,\n                bg_label=data.bg_label,\n            )\n\n        mask_tr = seg_to_patches\n\n    dataloader = get_dataloader(\n        data,\n        batch_size=batch_size,\n        n_workers=n_workers,\n        data_tr=data_tr,\n        mask_tr=mask_tr,\n        sample_tr=sample_tr,\n    )\n\n    n_batches = math.ceil(data.n_examples / batch_size)\n    logger.info(\"Dumping %d batches of %d examples.\", n_batches, batch_size)\n\n    model = model.to(device)\n\n    md = Metadata(\n        family=family,\n        ckpt=ckpt,\n        layers=tuple(layers),\n        content_tokens_per_example=content_tokens_per_example,\n        cls_token=cls_token,\n        d_model=d_model,\n        n_examples=data.n_examples,\n        max_tokens_per_shard=max_tokens_per_shard,\n        data=base64.b64encode(pickle.dumps(data)).decode(\"utf8\"),\n        dataset=data.root,\n        pixel_agg=pixel_agg,\n    )\n    md.dump(shards_root)\n\n    # Use context manager for proper cleanup\n    with ShardWriter(shards_root, md) as writer:\n        i = 0\n        # Calculate and write transformer activations.\n        with torch.inference_mode():\n            for batch in helpers.progress(dataloader, total=n_batches):\n                data = batch.get(\"data\").to(device)\n                grid = batch.get(\"grid\")\n                if grid is not None:\n                    grid = grid.to(device)\n                    out, cache = model(data, grid=grid)\n                else:\n                    out, cache = model(data)\n                # cache has shape [batch size, n layers, n patches + 1, d model]\n                del out\n\n                # Write activations and labels (if present) in one call\n                patch_labels = batch.get(\"patch_labels\")\n                if patch_labels is not None:\n                    logger.debug(\n                        \"Found patch_labels in batch: shape=%s\",\n                        patch_labels.shape\n                        if hasattr(patch_labels, \"shape\")\n                        else \"unknown\",\n                    )\n                    # Ensure correct shape\n                    assert patch_labels.shape == (\n                        len(cache),\n                        content_tokens_per_example,\n                    )\n                else:\n                    logger.debug(f\"No patch_labels in batch. Keys: {batch.keys()}\")\n\n                writer.write_batch(cache, i, patch_labels=patch_labels)\n\n                i += len(cache)\n\n    return shards_root / md.hash\n</code></pre>"},{"location":"api/data/shuffled/","title":"saev.data.shuffled","text":""},{"location":"api/data/shuffled/#saev.data.shuffled.Config","title":"<code>Config(shards=pathlib.Path('$SAEV_SCRATCH/saev/shards/abcdefg'), tokens='content', layer=-1, batch_size=1024 * 16, drop_last=False, scale_norm=False, ignore_labels=list(), n_threads=4, buffer_size=64, min_buffer_fill=0.0, batch_timeout_s=30.0, seed=17, debug=False, log_every_s=30.0, use_tmpdir=False)</code>  <code>dataclass</code>","text":"<p>Configuration for loading shuffled activation data from disk.</p> <p>Attributes:</p> Name Type Description <code>shards</code> <code>Path</code> <p>Directory with .bin shards and a metadata.json file.</p> <code>tokens</code> <code>Literal['special', 'content', 'all']</code> <p>Which subset of tokens to use. 'special' indicates the special tokens (if any). 'content' indicates it will return content tokens. 'all' returns all tokens.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_size","title":"<code>batch_size = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.batch_timeout_s","title":"<code>batch_timeout_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How long to wait for at least one batch.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.buffer_size","title":"<code>buffer_size = 64</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of batches to queue in the shared-memory ring buffer. Higher values add latency but improve resilience to brief stalls.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.debug","title":"<code>debug = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the dataloader process should log debug messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.drop_last","title":"<code>drop_last = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to drop the last batch if it's smaller than the others.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If provided, exclude tokens with these label values. None means no filtering. Common use: ignore_labels=[0] to exclude background.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.layer","title":"<code>layer = -1</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which transformer layer(s) to read from disk. <code>-1</code> is the default, but must be changed. <code>\"all\"</code> enumerates every recorded layer.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.log_every_s","title":"<code>log_every_s = 30.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How frequently the dataloader process should log (debug) performance messages.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.min_buffer_fill","title":"<code>min_buffer_fill = 0.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Fraction of the reservoir that must be populated before yielding batches.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.n_threads","title":"<code>n_threads = 4</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloading threads.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.scale_norm","title":"<code>scale_norm = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to scale norms to sqrt(D).</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.seed","title":"<code>seed = 17</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.Config.use_tmpdir","title":"<code>use_tmpdir = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>If True and $TMPDIR is set, copy shards to local storage before training to avoid Infiniband congestion.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader","title":"<code>DataLoader(cfg)</code>","text":"<p>High-throughput streaming loader that deterministically shuffles data from disk shards.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __init__(self, cfg: Config):\n    self.cfg = cfg\n\n    self.manager_proc = None\n    self.reservoir = None\n    self.stop_event = None\n    self._last_reservoir_fill: float | None = None\n    self._logged_effective_capacity = False\n\n    self.logger = logging.getLogger(\"shuffled.DataLoader\")\n    self.ctx = mp.get_context()\n\n    if not os.path.isdir(self.cfg.shards):\n        raise RuntimeError(f\"Activations are not saved at '{self.cfg.shards}'.\")\n\n    # Copy to TMPDIR if requested, otherwise use original path\n    if self.cfg.use_tmpdir:\n        self._shards_path = _copy_shards_to_tmpdir(self.cfg.shards, self.logger)\n    else:\n        self._shards_path = self.cfg.shards\n\n    if self.cfg.scale_norm:\n        raise NotImplementedError(\"scale_norm not implemented.\")\n\n    self.metadata = shards.Metadata.load(self._shards_path)\n\n    # Validate shard files exist and are non-empty\n    shard_info = shards.ShardInfo.load(self._shards_path)\n    shard_info.validate(self._shards_path)\n\n    self._n_samples = self._calculate_n_samples()\n\n    # Check if labels.bin exists for filtering\n    self.labels_mmap = None\n    if self.cfg.ignore_labels:\n        labels_path = os.path.join(self._shards_path, \"labels.bin\")\n        if not os.path.exists(labels_path):\n            raise FileNotFoundError(\n                f\"ignore_labels filtering requested but labels.bin not found at {labels_path}\"\n            )\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.ExampleBatch","title":"<code>ExampleBatch</code>","text":"<p>               Bases: <code>TypedDict</code></p> <p>Individual example.</p>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__iter__","title":"<code>__iter__()</code>","text":"<p>Yields batches.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __iter__(self) -&gt; collections.abc.Iterator[ExampleBatch]:\n    \"\"\"Yields batches.\"\"\"\n    self._start_manager()\n    n, b = 0, 0\n\n    try:\n        while n &lt; self.n_samples:\n            need = min(self.cfg.batch_size, self.n_samples - n)\n            remaining_samples = self.n_samples - n\n            self._wait_for_min_buffer_fill(remaining_samples)\n            if not self.err_queue.empty():\n                who, tb = self.err_queue.get_nowait()\n                raise RuntimeError(f\"{who} crashed:\\n{tb}\")\n\n            try:\n                act, meta = self.reservoir.get(\n                    need, timeout=self.cfg.batch_timeout_s\n                )\n                n += need\n                b += 1\n                example_idx, token_idx = meta.T\n                yield self.ExampleBatch(\n                    act=act, example_idx=example_idx, token_idx=token_idx\n                )\n                continue\n            except TimeoutError:\n                if self.cfg.ignore_labels:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds. This can happen when filtering out many labels.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n                else:\n                    self.logger.info(\n                        \"Did not get a batch from %d worker threads in %.1fs seconds.\",\n                        self.cfg.n_threads,\n                        self.cfg.batch_timeout_s,\n                    )\n\n            # If we don't continue, then we should check on the manager process.\n            if not self.manager_proc.is_alive():\n                raise RuntimeError(\n                    f\"Manager process died unexpectedly after {b}/{len(self)} batches.\"\n                )\n\n    finally:\n        self.shutdown()\n</code></pre>"},{"location":"api/data/shuffled/#saev.data.shuffled.DataLoader.__len__","title":"<code>__len__()</code>","text":"<p>Returns the number of batches in an epoch.</p> Source code in <code>src/saev/data/shuffled.py</code> <pre><code>def __len__(self) -&gt; int:\n    \"\"\"Returns the number of batches in an epoch.\"\"\"\n    return math.ceil(self.n_samples / self.cfg.batch_size)\n</code></pre>"},{"location":"api/data/siglip/","title":"saev.data.siglip","text":""},{"location":"api/data/siglip/#saev.data.siglip.Vit","title":"<code>Vit(ckpt)</code>","text":"<p>               Bases: <code>Module</code>, <code>Transformer</code></p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>def __init__(self, ckpt: str):\n    super().__init__()\n\n    if ckpt.startswith(\"hf-hub:\"):\n        clip, _ = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        clip, _ = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    self._ckpt = ckpt\n\n    model = clip.visual\n    model.proj = None\n    model.output_tokens = True  # type: ignore\n    self.model = model\n\n    assert isinstance(self.model, open_clip.timm_model.TimmModel)\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_resize","title":"<code>make_resize(ckpt, n_patches_per_img=-1, *, scale=1.0, resample=Image.LANCZOS)</code>  <code>staticmethod</code>","text":"<p>Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_resize(\n    ckpt: str,\n    n_patches_per_img: int = -1,\n    *,\n    scale: float = 1.0,\n    resample: Image.Resampling = Image.LANCZOS,\n) -&gt; Callable[[Image.Image], Image.Image]:\n    \"\"\"Create resize transform for visualization. Use resample=Image.NEAREST for segmentation masks.\"\"\"\n    from PIL import Image\n\n    def resize(img: Image.Image) -&gt; Image.Image:\n        # SigLIP typically uses 224x224 or 384x384 images\n        # We'll assume 224x224 for simplicity\n        resize_size_px = (int(224 * scale), int(224 * scale))\n        return img.resize(resize_size_px, resample=resample)\n\n    return resize\n</code></pre>"},{"location":"api/data/siglip/#saev.data.siglip.Vit.make_transforms","title":"<code>make_transforms(ckpt, n_patches_per_img)</code>  <code>staticmethod</code>","text":"<p>Create transforms for preprocessing: (img_transform, sample_transform | None).</p> Source code in <code>src/saev/data/siglip.py</code> <pre><code>@staticmethod\ndef make_transforms(\n    ckpt: str, n_patches_per_img: int\n) -&gt; tuple[Callable, Callable | None]:\n    \"\"\"Create transforms for preprocessing: (img_transform, sample_transform | None).\"\"\"\n    if ckpt.startswith(\"hf-hub:\"):\n        _, img_transform = open_clip.create_model_from_pretrained(\n            ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    else:\n        arch, ckpt = ckpt.split(\"/\")\n        _, img_transform = open_clip.create_model_from_pretrained(\n            arch, pretrained=ckpt, cache_dir=helpers.get_cache_dir()\n        )\n    return img_transform, None\n</code></pre>"},{"location":"api/data/transforms/","title":"saev.data.transforms","text":""},{"location":"api/data/transforms/#saev.data.transforms.conv2d_to_tokens","title":"<code>conv2d_to_tokens(x_bchw, conv)</code>","text":"<p>Conv2d then flatten spatial to L, return (B, L, D).</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef conv2d_to_tokens(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"Conv2d then flatten spatial to L, return (B, L, D).\"\"\"\n    y_bdhw = conv(x_bchw)\n    return einops.rearrange(y_bdhw, \"b d h w -&gt; b (h w) d\")\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.resize_to_patch_grid","title":"<code>resize_to_patch_grid(img, *, p, n, resample=Image.LANCZOS)</code>","text":"<p>Resize image to (w, h) so that:   - w % p == 0, h % p == 0   - (h/p) * (w/p) == N   - Minimizes change in aspect ratio.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@beartype.beartype\ndef resize_to_patch_grid(\n    img: Image.Image,\n    *,\n    p: int,\n    n: int,\n    resample: Image.Resampling | int = Image.LANCZOS,\n) -&gt; Image.Image:\n    \"\"\"\n    Resize image to (w, h) so that:\n      - w % p == 0, h % p == 0\n      - (h/p) * (w/p) == N\n      - Minimizes change in aspect ratio.\n    \"\"\"\n    if p &lt;= 0 or n &lt;= 0:\n        raise ValueError(\"p and n must be positive integers\")\n\n    w0, h0 = img.size\n    a0 = w0 / h0\n\n    # Find the aspect ratio closest to a0\n    best_c = 0\n    best_dist = float(\"inf\")\n    for i in range(1, int(math.sqrt(n) + 1)):\n        if n % i != 0:\n            continue\n\n        for d in (i, n // i):\n            c, r = d, n // d\n            aspect = c / r\n            dist = abs(aspect - a0)\n\n            if dist &lt; best_dist:\n                best_c = d\n                best_dist = dist\n\n    c = best_c\n    r = n // c\n    w, h = c * p, r * p\n    return img.resize((w, h), resample=resample)\n</code></pre>"},{"location":"api/data/transforms/#saev.data.transforms.unfolded_conv2d","title":"<code>unfolded_conv2d(x_bchw, conv)</code>","text":"<p>Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels. Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.</p> Source code in <code>src/saev/data/transforms.py</code> <pre><code>@jaxtyped(typechecker=beartype.beartype)\ndef unfolded_conv2d(\n    x_bchw: Float[Tensor, \"b c h w\"], conv: nn.Conv2d\n) -&gt; Float[Tensor, \"b n d\"]:\n    \"\"\"\n    Returns tokens shaped (B, L, D), where L = (H/k)*(W/k), D = conv.out_channels.\n    Requires: stride == kernel_size, padding == 0, groups == 1, dilation == 1.\n    \"\"\"\n    k = conv.kernel_size[0]\n\n    assert conv.kernel_size == (k, k)\n    assert conv.stride == (k, k)\n    assert conv.padding == (0, 0)\n    assert conv.groups == 1\n    assert conv.dilation == (1, 1)\n\n    *b, c, h, w = x_bchw.shape\n\n    assert h % k == 0 and w % k == 0\n\n    tokens_bnd = einops.rearrange(\n        x_bchw, \"b c (hp p1) (wp p2) -&gt; b (hp wp) (c p1 p2)\", p1=k, p2=k\n    ).contiguous()\n    w_dp = conv.weight.reshape(conv.out_channels, c * k * k)\n    tokens_bnd = tokens_bnd @ w_dp.T\n    if conv.bias is not None:\n        tokens_bnd = tokens_bnd + conv.bias[None, None, :]\n    return tokens_bnd\n</code></pre>"},{"location":"api/framework/inference/","title":"saev.framework.inference","text":"<p>Script for dumping SAE inference artifacts in a single pass over the dataset.</p> <p>Default mode writes 5 files:</p> <ol> <li>mean_values.pt</li> <li>sparsity.pt</li> <li>distributions.pt</li> <li>token_acts.npz</li> <li>metrics.json</li> </ol> <p>If save=False, only metrics.json is written.</p> <p>metrics.json is serialized from <code>saev.metrics.Metrics</code>.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config","title":"<code>Config(run=pathlib.Path('./runs/abcdefg'), data=OrderedConfig(), n_dists=25, ignore_labels=list(), force_recompute=False, save=True, device='cuda', slurm_acct='', slurm_partition='', n_hours=4.0, mem_gb=80, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for computing image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.data","title":"<code>data = OrderedConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Data configuration</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which accelerator to use.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.force_recompute","title":"<code>force_recompute = False</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Force recomputation even if files exist.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.ignore_labels","title":"<code>ignore_labels = dataclasses.field(default_factory=list)</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which token labels to ignore when calculating summarized image activations.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.mem_gb","title":"<code>mem_gb = 80</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_dists","title":"<code>n_dists = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features to save distributions for.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.n_hours","title":"<code>n_hours = 4.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.run","title":"<code>run = pathlib.Path('./runs/abcdefg')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Path to the sae.pt file.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.save","title":"<code>save = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to write token_acts/statistics files. If False, only metrics.json is written.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/inference/#saev.framework.inference.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/inference/#saev.framework.inference.main","title":"<code>main(cfg, sweep=None)</code>","text":"<p>Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config inference.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> Source code in <code>src/saev/framework/inference.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")], sweep: pathlib.Path | None = None\n):\n    \"\"\"\n    Run SAE inference over transformer activations, optionally using a sweep file to submit many jobs at once.\n\n    Args:\n        cfg: Baseline config inference.\n        sweep: Path to .py file defining the sweep parameters.\n    \"\"\"\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    assert all(c.slurm_acct == cfgs[0].slurm_acct for c in cfgs)\n    cfg = cfgs[0]\n\n    if not cfg.slurm_acct:\n        for i, cfg_item in enumerate(cfgs, start=1):\n            logger.info(\"Running config %d/%d locally.\", i, len(cfgs))\n            worker_fn(cfg_item)\n        logger.info(\"Jobs done.\")\n        return 0\n\n    import submitit\n    from submitit.core.utils import UncompletedJobError\n\n    executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n    executor.update_parameters(\n        job_name=\"sae-inference\",\n        time=int(cfg.n_hours * 60),\n        partition=cfg.slurm_partition,\n        gpus_per_node=1,\n        ntasks_per_node=1,\n        mem=f\"{cfg.mem_gb}GB\",\n        stderr_to_stdout=True,\n        account=cfg.slurm_acct,\n    )\n    with executor.batch():\n        jobs = []\n        for i, cfg in enumerate(cfgs):\n            do, reason, _ = need_compute(cfg)\n            if not do:\n                continue\n\n            logger.info(reason)\n            jobs.append(executor.submit(worker_fn, cfg))\n\n    time.sleep(5.0)\n\n    for i, job in enumerate(jobs, start=1):\n        logger.info(\"Job %d/%d: %s %s\", i, len(jobs), job.job_id, job.state)\n\n    for i, job in enumerate(jobs, start=1):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", i, len(jobs))\n        except UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, i)\n\n    logger.info(\"Jobs done.\")\n    return 0\n</code></pre>"},{"location":"api/framework/saev.framework/","title":"saev.framework","text":"<p>Submitit entrypoint modules for SAE workflows.</p> <p><code>saev.framework</code> is for script-like modules (e.g. train/inference/shards) that need importable module paths for submitit launchers. Place reusable data/model utilities outside this package.</p>"},{"location":"api/framework/shards/","title":"saev.framework.shards","text":"<p>To save lots of activations, we want to do things in parallel, with lots of slurm jobs, and save multiple files, rather than just one.</p> <p>This script handles that additional complexity.</p> <p>Conceptually, activations are either thought of as</p> <ol> <li>A single [n_imgs x n_layers x (n_patches + 1), d_model] tensor. This is a dataset</li> <li>Multiple [n_imgs_per_shard, n_layers, (n_patches + 1), d_model] tensors. This is a set of sharded activations.</li> </ol>"},{"location":"api/framework/shards/#saev.framework.shards.Config","title":"<code>Config(data=datasets.Imagenet(), shards_root=pathlib.Path('$SAEV_SCRATCH/saev/shards/'), family='clip', ckpt='ViT-L-14/openai', batch_size=1024, n_workers=8, d_model=1024, layers=(lambda: [-2])(), content_tokens_per_example=256, cls_token=True, pixel_agg=PixelAgg.MAJORITY, max_tokens_per_shard=2400000, ssl=True, device='cuda', n_hours=24.0, slurm_acct='', slurm_partition='', log_to='./logs')</code>  <code>dataclass</code>","text":"<p>Configuration for calculating and saving ViT activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.batch_size","title":"<code>batch_size = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Batch size for ViT inference.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ckpt","title":"<code>ckpt = 'ViT-L-14/openai'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Specific model checkpoint.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.cls_token","title":"<code>cls_token = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether the model has a [CLS] token.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.content_tokens_per_example","title":"<code>content_tokens_per_example = 256</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of content tokens per example (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Dimension of the ViT activations (depends on model).</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.data","title":"<code>data = dataclasses.field(default_factory=(datasets.Imagenet))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which dataset to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which device to use.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.family","title":"<code>family = 'clip'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which model family.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.layers","title":"<code>layers = dataclasses.field(default_factory=(lambda: [-2]))</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Which layers to save. By default, the second-to-last layer.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.log_to","title":"<code>log_to = './logs'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.max_tokens_per_shard","title":"<code>max_tokens_per_shard = 2400000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum number of activations per shard; 2.4M is approximately 10GB for 1024-dimensional 4-byte activations.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.n_workers","title":"<code>n_workers = 8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of dataloader workers.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.shards_root","title":"<code>shards_root = pathlib.Path('$SAEV_SCRATCH/saev/shards/')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to write shards.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/shards/#saev.framework.shards.Config.ssl","title":"<code>ssl = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to use SSL.</p>"},{"location":"api/framework/shards/#saev.framework.shards.cli","title":"<code>cli(cfg)</code>","text":"<p>Save ViT activations for use later on.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Configuration for activations.</p> required Source code in <code>src/saev/framework/shards.py</code> <pre><code>@beartype.beartype\ndef cli(cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")]):\n    \"\"\"\n    Save ViT activations for use later on.\n\n    Args:\n        cfg: Configuration for activations.\n    \"\"\"\n    logger = logging.getLogger(\"dump\")\n\n    if not cfg.ssl:\n        logger.warning(\"Ignoring SSL certs. Try not to do this!\")\n        # https://github.com/openai/whisper/discussions/734#discussioncomment-4491761\n        # Ideally we don't have to disable SSL but we are only downloading weights.\n        import ssl\n\n        ssl._create_default_https_context = ssl._create_unverified_context\n\n    from saev.data import shards\n\n    kwargs = dict(\n        family=cfg.family,\n        ckpt=cfg.ckpt,\n        content_tokens_per_example=cfg.content_tokens_per_example,\n        cls_token=cfg.cls_token,\n        d_model=cfg.d_model,\n        layers=cfg.layers,\n        data=cfg.data,\n        batch_size=cfg.batch_size,\n        n_workers=cfg.n_workers,\n        max_tokens_per_shard=cfg.max_tokens_per_shard,\n        shards_root=cfg.shards_root,\n        device=cfg.device,\n        pixel_agg=cfg.pixel_agg,\n    )\n\n    # Actually record activations.\n    if cfg.slurm_acct:\n        import submitit\n\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n        executor.update_parameters(\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            cpus_per_task=cfg.n_workers + 4,\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n\n        job = executor.submit(shards.worker_fn, **kwargs)\n        logger.info(\"Running job '%s'.\", job.job_id)\n        job.result()\n\n    else:\n        shards.worker_fn(**kwargs)\n</code></pre>"},{"location":"api/framework/train/","title":"saev.framework.train","text":"<p>Trains many SAEs in parallel to amortize the cost of loading a single batch of data over many SAE training runs.</p> <p>Checklist for making sure your training doesn't suck:</p> <ul> <li>[ ] Data scaling: scale vectors so their average L2 norm is sqrt(n).</li> <li>[ ] Initialize b_e such that each feature activates 10K * d_model / (n * d_sae) of the time, which means that on average, each example activates 10K features.</li> <li>[x] Initialize b_d to 0.</li> <li>[x] Sweep learning rate and sparsity coefficients.</li> <li>[ ] Decay learning rate to 0 over the last 20% of training.</li> <li>[ ] Warmup sparsity over all of training.</li> <li>[x] Gradient clipping (clip at 1 with clip_grad_norm)</li> <li>[x] Track dead latents through training</li> </ul>"},{"location":"api/framework/train/#saev.framework.train.Config","title":"<code>Config(train_data=saev.data.ShuffledConfig(), val_data=saev.data.ShuffledConfig(), n_train=100000000, n_val=10000000, sae=nn.SparseAutoencoderConfig(), objective=nn.objectives.Matryoshka(), n_sparsity_warmup=0, optim='adam', lr=0.0004, n_lr_warmup=500, grad_clip=1.0, track=True, wandb_project='saev', tags=(), log_every=25, runs_root=pathlib.Path('$SAEV_NFS/saev/runs'), device='cuda', seed=42, slurm_acct='', slurm_partition='', n_hours=24.0, mem_gb=128, log_to=os.path.join('.', 'logs'))</code>  <code>dataclass</code>","text":"<p>Configuration for training a sparse autoencoder on a vision transformer.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.device","title":"<code>device = 'cuda'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Hardware device.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.grad_clip","title":"<code>grad_clip = 1.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Maximum gradient norm across all SAE parameters.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_every","title":"<code>log_every = 25</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How often to log to WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.log_to","title":"<code>log_to = os.path.join('.', 'logs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Where to log Slurm job stdout/stderr.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.lr","title":"<code>lr = 0.0004</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Learning rate.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.mem_gb","title":"<code>mem_gb = 128</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Node memory in GB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_hours","title":"<code>n_hours = 24.0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm job length in hours.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_lr_warmup","title":"<code>n_lr_warmup = 500</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of learning rate warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_sparsity_warmup","title":"<code>n_sparsity_warmup = 0</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of sparsity coefficient warmup steps.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_train","title":"<code>n_train = 100000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE training samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.n_val","title":"<code>n_val = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of SAE evaluation samples.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.objective","title":"<code>objective = nn.objectives.Matryoshka()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE objective configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.optim","title":"<code>optim = 'adam'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Optimizer for training.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.runs_root","title":"<code>runs_root = pathlib.Path('$SAEV_NFS/saev/runs')</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Root directory for runs.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.sae","title":"<code>sae = nn.SparseAutoencoderConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>SAE configuration.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.seed","title":"<code>seed = 42</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Random seed.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_acct","title":"<code>slurm_acct = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm account string. Empty means to not use Slurm.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.slurm_partition","title":"<code>slurm_partition = ''</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Slurm partition.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.tags","title":"<code>tags = ()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tags to add to WandB run.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.track","title":"<code>track = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to track with WandB.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.train_data","title":"<code>train_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Training data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.val_data","title":"<code>val_data = saev.data.ShuffledConfig()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Validation data.</p>"},{"location":"api/framework/train/#saev.framework.train.Config.wandb_project","title":"<code>wandb_project = 'saev'</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>WandB project name.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics","title":"<code>EvalMetrics(l0, l1, mse, normalized_mse, sse_sae, sse_baseline, n_dead, n_almost_dead, n_dense, freqs, mean_values, almost_dead_threshold, dense_threshold)</code>  <code>dataclass</code>","text":"<p>Results of evaluating a trained SAE on a datset.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.almost_dead_threshold","title":"<code>almost_dead_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for an \"almost dead\" neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.dense_threshold","title":"<code>dense_threshold</code>  <code>instance-attribute</code>","text":"<p>Threshold for a dense neuron.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.freqs","title":"<code>freqs</code>  <code>instance-attribute</code>","text":"<p>How often each feature fired.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Mean L0 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Mean L1 across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mean_values","title":"<code>mean_values</code>  <code>instance-attribute</code>","text":"<p>The mean value for each feature when it did fire.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Mean MSE across all examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_almost_dead","title":"<code>n_almost_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on fewer than <code>almost_dead_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that never fired on any example.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.n_dense","title":"<code>n_dense</code>  <code>instance-attribute</code>","text":"<p>Number of neurons that fired on more than <code>dense_threshold</code> of examples.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.normalized_mse","title":"<code>normalized_mse</code>  <code>instance-attribute</code>","text":"<p>Normalized reconstruction MSE (SAE SSE / mean-baseline SSE).</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_baseline","title":"<code>sse_baseline</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the mean baseline.</p>"},{"location":"api/framework/train/#saev.framework.train.EvalMetrics.sse_sae","title":"<code>sse_sae</code>  <code>instance-attribute</code>","text":"<p>Total reconstruction sum-squared error for the SAE.</p>"},{"location":"api/framework/train/#saev.framework.train.evaluate","title":"<code>evaluate(cfgs, saes, objectives)</code>","text":"<p>Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.</p> <p>The metrics computed are mean <code>L0</code>/<code>L1</code>/<code>MSE</code> losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of <code>EvalMetrics</code> is returned, one for each SAE.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\n@torch.no_grad()\ndef evaluate(\n    cfgs: list[Config], saes: torch.nn.ModuleList, objectives: torch.nn.ModuleList\n) -&gt; list[EvalMetrics]:\n    \"\"\"\n    Evaluates SAE quality by counting dead and dense features, recording reconstruction metrics (including normalized MSE), and making histogram plots to help human qualitative comparison.\n\n    The metrics computed are mean ``L0``/``L1``/``MSE`` losses, normalized reconstruction error, the number of dead, almost dead, and dense neurons, plus per-feature firing frequencies and mean values.  A list of `EvalMetrics` is returned, one for each SAE.\n    \"\"\"\n\n    torch.cuda.empty_cache()\n\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    saes.eval()\n    objectives.eval()\n\n    cfg = cfgs[0]\n\n    almost_dead_lim = 1e-7\n    dense_lim = 1e-2\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.val_data)\n    n_val = min(dataloader.n_samples, cfg.n_val)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, n_val)\n\n    n_fired = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    values = torch.zeros((len(cfgs), saes[0].cfg.d_sae))\n    total_l0_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_l1_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_mse_sum = torch.zeros(len(cfgs), dtype=torch.float64)\n    total_sse_sae = torch.zeros(len(cfgs), dtype=torch.float64, device=cfg.device)\n    sum_sq = torch.zeros((), dtype=torch.float64, device=cfg.device)\n    sum_vec = torch.zeros(\n        (saes[0].cfg.d_model,), dtype=torch.float64, device=cfg.device\n    )\n    n_tokens = 0\n\n    for batch in helpers.progress(dataloader, desc=\"eval\", every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        batch_size = acts_BD.shape[0]\n        acts_BD_f64 = acts_BD.to(torch.float64)\n        sum_sq += torch.sum(acts_BD_f64 * acts_BD_f64)\n        sum_vec += acts_BD_f64.sum(dim=0)\n        n_tokens += batch_size\n        for i, (sae, objective) in enumerate(zip(saes, objectives)):\n            # Objective now handles the forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            # Get f_x for metrics\n            residual = acts_BD - fwd.x_hats[:, -1, :]\n            total_sse_sae[i] += torch.sum((residual.to(torch.float64)) ** 2)\n            n_fired[i] += einops.reduce(\n                fwd.f_x &gt; 0, \"batch d_sae -&gt; d_sae\", \"sum\"\n            ).cpu()\n            values[i] += einops.reduce(fwd.f_x, \"batch d_sae -&gt; d_sae\", \"sum\").cpu()\n            total_l0_sum[i] += loss.l0.cpu().item() * batch_size\n            total_l1_sum[i] += loss.l1.cpu().item() * batch_size\n            total_mse_sum[i] += loss.mse.cpu().item() * batch_size\n\n    msg = \"Validation dataloader yielded zero tokens; cannot compute normalized MSE.\"\n    assert n_tokens &gt; 0, msg\n    sum_vec_sq = torch.dot(sum_vec, sum_vec)\n    sse_baseline = sum_sq - sum_vec_sq / n_tokens\n    msg = (\n        f\"Validation baseline variance non-positive: \"\n        f\"sse_baseline={sse_baseline.item():.6e}\"\n    )\n    assert sse_baseline &gt; 0, msg\n    sse_baseline_value = sse_baseline.item()\n\n    mean_values = values / n_fired\n    freqs = n_fired / n_tokens\n\n    l0 = (total_l0_sum / n_tokens).tolist()\n    l1 = (total_l1_sum / n_tokens).tolist()\n    mse = (total_mse_sum / n_tokens).tolist()\n    sse_sae = total_sse_sae.tolist()\n    normalized_mse = (total_sse_sae / sse_baseline_value).tolist()\n    sse_baseline_all = [sse_baseline_value] * len(cfgs)\n\n    n_dead = einops.reduce(freqs == 0, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n    n_almost_dead = einops.reduce(\n        freqs &lt; almost_dead_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\"\n    ).tolist()\n    n_dense = einops.reduce(freqs &gt; dense_lim, \"n_saes d_sae -&gt; n_saes\", \"sum\").tolist()\n\n    metrics = []\n    for i in range(len(cfgs)):\n        metrics.append(\n            EvalMetrics(\n                l0=l0[i],\n                l1=l1[i],\n                mse=mse[i],\n                normalized_mse=normalized_mse[i],\n                sse_sae=sse_sae[i],\n                sse_baseline=sse_baseline_all[i],\n                n_dead=n_dead[i],\n                n_almost_dead=n_almost_dead[i],\n                n_dense=n_dense[i],\n                freqs=freqs[i],\n                mean_values=mean_values[i],\n                almost_dead_threshold=almost_dead_lim,\n                dense_threshold=dense_lim,\n            )\n        )\n\n    return metrics\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.main","title":"<code>main(cfg, sweep=None, max_parallel=None)</code>","text":"<p>Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.</p> <p>Parameters:</p> Name Type Description Default <code>cfg</code> <code>Annotated[Config, arg(name='')]</code> <p>Baseline config for training an SAE.</p> required <code>sweep</code> <code>Path | None</code> <p>Path to .py file defining the sweep parameters.</p> <code>None</code> <code>max_parallel</code> <code>int | None</code> <p>Maximum SAEs to train concurrently within a single worker.</p> <code>None</code> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef main(\n    cfg: tp.Annotated[Config, tyro.conf.arg(name=\"\")],\n    sweep: pathlib.Path | None = None,\n    max_parallel: int | None = None,\n):\n    \"\"\"\n    Train an SAE over activations, optionally running a parallel grid search over a set of hyperparameters.\n\n    Args:\n        cfg: Baseline config for training an SAE.\n        sweep: Path to .py file defining the sweep parameters.\n        max_parallel: Maximum SAEs to train concurrently within a single worker.\n    \"\"\"\n    log_format = \"[%(asctime)s] [%(levelname)s] [%(name)s] %(message)s\"\n    logging.basicConfig(level=logging.INFO, format=log_format)\n\n    import submitit\n\n    if sweep is not None:\n        sweep_dcts = configs.load_sweep(sweep)\n        if not sweep_dcts:\n            logger.error(\"No valid sweeps found in '%s'.\", sweep)\n            sys.exit(1)\n\n        cfgs, errs = configs.load_cfgs(cfg, default=Config(), sweep_dcts=sweep_dcts)\n\n        if errs:\n            for err in errs:\n                logger.warning(\"Error in config: %s\", err)\n            return\n\n    else:\n        cfgs = [cfg]\n\n    cfgs = split_cfgs(cfgs)\n    # codex resume 019ac16a-dc07-78e3-82c7-e5c08a6c6f0c\n    if max_parallel:\n        cfgs = [\n            subgroup\n            for group in cfgs\n            for subgroup in [\n                group[start:end]\n                for start, end in helpers.batched_idx(len(group), max_parallel)\n            ]\n        ]\n\n    logger.info(\"Running %d training jobs.\", len(cfgs))\n\n    # Use the first resolved config for submitit parameters (n_hours, mem_gb, etc.) so that sweep values take effect instead of CLI defaults.\n    cfg = cfgs[0][0]\n\n    if cfg.slurm_acct:\n        executor = submitit.SlurmExecutor(folder=cfg.log_to)\n\n        executor.update_parameters(\n            job_name=\"sae-train\",\n            time=int(cfg.n_hours * 60),\n            partition=cfg.slurm_partition,\n            gpus_per_node=1,\n            ntasks_per_node=1,\n            mem=f\"{cfg.mem_gb}GB\",\n            stderr_to_stdout=True,\n            account=cfg.slurm_acct,\n        )\n    else:\n        executor = submitit.DebugExecutor(folder=cfg.log_to)\n\n    try:\n        cloudpickle.dumps(worker_fn)\n        for group in cfgs:\n            cloudpickle.dumps(group)\n    except TypeError as err:\n        raise AssertionError(f\"Failed to pickle: {err}\")\n\n    with executor.batch():\n        jobs = [executor.submit(worker_fn, group) for group in cfgs]\n\n    # Give the executor five seconds to fire the jobs off.\n    time.sleep(5.0)\n\n    # Log initial status.\n    for j, job in enumerate(jobs):\n        logger.info(\"Job %d/%d: %s %s\", j + 1, len(jobs), job.job_id, job.state)\n\n    for j, job in enumerate(jobs):\n        try:\n            job.result()\n            logger.info(\"Job %d/%d finished.\", j + 1, len(jobs))\n        except submitit.core.utils.UncompletedJobError:\n            logger.warning(\"Job %s (%d) did not finish.\", job.job_id, j)\n\n    logger.info(\"Jobs done.\")\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.split_cfgs","title":"<code>split_cfgs(cfgs)</code>","text":"<p>Splits configs into groups that can be parallelized.</p> <p>Parameters:</p> Name Type Description Default <code>cfgs</code> <code>list[Config]</code> <p>A list of configs from a sweep file.</p> required <p>Returns:</p> Type Description <code>list[list[Config]]</code> <p>A list of lists, where the configs in each sublist do not differ in any keys that are in <code>CANNOT_PARALLELIZE</code>. This means that each sublist is a valid \"parallel\" set of configs for <code>train</code>.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef split_cfgs(cfgs: list[Config]) -&gt; list[list[Config]]:\n    \"\"\"\n    Splits configs into groups that can be parallelized.\n\n    Arguments:\n        cfgs: A list of configs from a sweep file.\n\n    Returns:\n        A list of lists, where the configs in each sublist do not differ in any keys that are in `CANNOT_PARALLELIZE`. This means that each sublist is a valid \"parallel\" set of configs for `train`.\n    \"\"\"\n    groups = collections.defaultdict(list)\n    for cfg in cfgs:\n        key = _parallel_key(cfg)\n        groups[key].append(cfg)\n\n    return [\n        [\n            dataclasses.replace(\n                cfg,\n                train_data=dataclasses.replace(cfg.train_data, seed=cfg.seed),\n                val_data=dataclasses.replace(cfg.val_data, seed=cfg.seed),\n            )\n            for cfg in group\n        ]\n        for _, group in sorted(groups.items())\n    ]\n</code></pre>"},{"location":"api/framework/train/#saev.framework.train.train","title":"<code>train(cfgs)</code>","text":"<p>Explicitly declare the optimizer, schedulers, dataloader, etc outside of <code>main</code> so that all the variables are dropped from scope and can be garbage collected.</p> Source code in <code>src/saev/framework/train.py</code> <pre><code>@beartype.beartype\ndef train(\n    cfgs: list[Config],\n) -&gt; tuple[\n    torch.nn.ModuleList, torch.nn.ModuleList, saev.utils.wandb.ParallelWandbRun, int\n]:\n    \"\"\"\n    Explicitly declare the optimizer, schedulers, dataloader, etc outside of `main` so that all the variables are dropped from scope and can be garbage collected.\n    \"\"\"\n    if len(split_cfgs(cfgs)) != 1:\n        raise ValueError(\"Configs are not parallelizeable: {cfgs}.\")\n\n    logger.info(\"Parallelizing %d runs.\", len(cfgs))\n\n    cfg = cfgs[0]\n    if torch.cuda.is_available():\n        # This enables tf32 on Ampere GPUs which is only 8% slower than\n        # float16 and almost as accurate as float32\n        # This was a default in pytorch until 1.12\n        torch.backends.cuda.matmul.allow_tf32 = True\n\n    dataloader = saev.data.ShuffledDataLoader(cfg.train_data)\n    dataloader = saev.utils.scheduling.BatchLimiter(dataloader, cfg.n_train)\n\n    saes, objectives, param_groups = make_saes(\n        [(c.sae, c.objective) for c in cfgs], dataloader\n    )\n\n    mode = \"online\" if cfg.track else \"disabled\"\n    tags = list(cfg.tags)\n\n    # Add metadata to configs for WandB logging\n    metadata_dict = dataclasses.asdict(dataloader.metadata)\n    wandb_configs = []\n    for c in cfgs:\n        cfg_dict = dataclasses.asdict(c)\n        cfg_dict[\"train_data\"][\"metadata\"] = metadata_dict\n        wandb_configs.append(cfg_dict)\n\n    run = saev.utils.wandb.ParallelWandbRun(\n        cfg.wandb_project, wandb_configs, mode, tags\n    )\n    slurm_job_id = os.environ.get(\"SLURM_JOB_ID\")\n    if slurm_job_id:\n        run.set_summary(\"slurm_job_id\", slurm_job_id)\n\n    # Build per-SAE bundles of optimizers/param_groups/schedulers so each config's LR and warmup drive both Muon and Adam param groups for that SAE. We reshape the flat param_groups into per-SAE lists because we need to:\n    #   (a) build schedulers with that SAE's cfg\n    #   (b) step/zero only that SAE's optimizers\n    #   (c) log that SAE's LR without fishing through a mixed flat list.\n    grouped_pgs: list[list[dict[str, object]]] = []\n    optimizers: list[list[torch.optim.Optimizer]] = []\n    lr_schedulers: list[list[saev.utils.scheduling.WarmupCosine]] = []\n\n    for i, (sae, cfg, param_group) in enumerate(zip(saes, cfgs, param_groups)):\n        if cfg.optim == \"adam\":\n            opts = [torch.optim.Adam([param_group], fused=True)]\n        elif cfg.optim == \"muon\":\n            muon_params = [p for p in sae.parameters() if p.ndim == 2]\n            msg = f\"Muon optimizer requires 2D params; SAE {i} has none.\"\n            assert muon_params, msg\n            adam_params = [p for p in sae.parameters() if p.ndim != 2]\n            msg = f\"Adam optimizer requires non-2D params; SAE {i} has none.\"\n            assert adam_params, msg\n\n            opts = [\n                torch.optim.Muon(muon_params, lr=0.0),\n                torch.optim.Adam(adam_params, lr=0.0, fused=True),\n            ]\n        else:\n            tp.assert_never(cfg.optim)\n\n        pgs = [pg for opt in opts for pg in opt.param_groups]\n        scheds = [\n            saev.utils.scheduling.WarmupCosine(\n                0.0, cfg.n_lr_warmup, cfg.lr, len(dataloader), 0.0\n            )\n            for _ in pgs\n        ]\n\n        optimizers.append(opts)\n        grouped_pgs.append(pgs)\n        lr_schedulers.append(scheds)\n\n    param_groups = grouped_pgs\n\n    saes.train()\n    saes = saes.to(cfg.device)\n    objectives.train()\n    objectives = objectives.to(cfg.device)\n\n    global_step, n_patches_seen = 0, 0\n    dl_monitor = DataloaderMonitor(dataloader)\n\n    for batch in helpers.progress(dataloader, every=cfg.log_every):\n        acts_BD = batch[\"act\"].to(cfg.device, non_blocking=True)\n        for sae in saes:\n            sae.normalize_w_dec()\n        # Forward passes and loss calculations.\n        losses = []\n        fwds = []\n        for sae, objective in zip(saes, objectives):\n            # Objective handles the SAE forward pass internally\n            loss, fwd = objective(sae, acts_BD)\n            losses.append(loss)\n            fwds.append(fwd)\n\n        n_patches_seen += len(acts_BD)\n\n        for loss in losses:\n            loss.loss.backward()\n\n        # remove parallel gradients or normalize columns?\n        for sae in saes:\n            sae.remove_parallel_grads()\n\n        # Calculate gradient norms before optimizer step\n        grad_norms = []\n        for sae, cfg in zip(saes, cfgs):\n            # Clip gradients and get the gradient norm\n            grad_norm = torch.nn.utils.clip_grad_norm_(\n                sae.parameters(), max_norm=cfg.grad_clip\n            )\n\n            grad_norms.append(grad_norm)\n\n        # Log metrics after gradient computation\n        if (global_step + 1) % cfg.log_every == 0:\n            with torch.no_grad():\n                now = time.time()\n                dl_metrics = dl_monitor.compute(now=now)\n\n                metadata = dataloader.metadata\n                entropy_metrics = statistics.calc_batch_entropy(\n                    batch[\"example_idx\"].to(\"cpu\"),\n                    batch[\"token_idx\"].to(\"cpu\"),\n                    metadata.n_examples,\n                    metadata.content_tokens_per_example,\n                )\n                dl_metrics.update(entropy_metrics)\n\n                acts_bd_f64 = acts_BD.to(torch.float64)\n                n_batch = acts_bd_f64.shape[0]\n                msg = \"Batch is empty; cannot compute normalized MSE.\"\n                assert n_batch &gt; 0, msg\n                batch_sum_sq = torch.sum(acts_bd_f64 * acts_bd_f64)\n                batch_sum_vec = acts_bd_f64.sum(dim=0)\n                batch_baseline_sse = (\n                    batch_sum_sq - torch.dot(batch_sum_vec, batch_sum_vec) / n_batch\n                )\n                msg = f\"Batch baseline variance non-positive: sse_baseline={batch_baseline_sse.item():.6e}\"\n                assert batch_baseline_sse &gt; 0, msg\n                batch_baseline_sse_value = batch_baseline_sse.item()\n\n                metrics = []\n                for i, (loss, sae, objective, fwd) in enumerate(\n                    zip(losses, saes, objectives, fwds)\n                ):\n                    current_lr = param_groups[i][0][\"lr\"]\n                    # Explained variance: 1 - Var(x - x_hat) / Var(x)\n                    residual = acts_BD - fwd.x_hats[:, -1, :]\n                    batch_sse_sae_value = torch.sum(\n                        (residual.to(torch.float64)) ** 2\n                    ).item()\n                    normalized_mse_value = (\n                        batch_sse_sae_value / batch_baseline_sse_value\n                    )\n                    explained_var = 1 - residual.var() / acts_BD.var()\n\n                    # Dead unit percentage: fraction of units that never activate\n                    dead_pct = ((fwd.f_x.abs() &gt; 1e-12).sum(0) == 0).float().mean()\n\n                    # Dictionary coherence: max |&lt;w_i, w_j&gt;| for i != j\n                    W = sae.W_dec  # (d_sae, d_model)\n                    # Normalize each row (each SAE feature)\n                    W_norm = W / W.norm(dim=1, keepdim=True)\n                    coherence = (W_norm @ W_norm.T).abs().triu(1).max()\n\n                    # Average decoder row L2 norm (since W_dec is d_sae x d_model)\n                    avg_w_row_norm = sae.W_dec.norm(dim=1).mean()\n\n                    metric = {\n                        **{f\"loss/{key}\": val for key, val in loss.metrics().items()},\n                        \"progress/n_patches_seen\": n_patches_seen,\n                        \"progress/learning_rate\": current_lr,\n                        \"metrics/explained_variance\": explained_var.item(),\n                        \"metrics/dead_unit_pct\": dead_pct.item(),\n                        \"metrics/dictionary_coherence\": coherence.item(),\n                        \"metrics/avg_decoder_row_norm\": avg_w_row_norm.item(),\n                        \"metrics/grad_norm\": grad_norms[i].item(),\n                        \"metrics/sse_sae\": batch_sse_sae_value,\n                        \"metrics/sse_baseline\": batch_baseline_sse_value,\n                        \"metrics/normalized_mse\": normalized_mse_value,\n                        **dl_metrics,\n                    }\n\n                    metrics.append(metric)\n                run.log(metrics, step=global_step)\n\n                logger.info(\n                    \", \".join(\n                        f\"{key}: {value:.5f}\"\n                        for key, value in losses[0].metrics().items()\n                    )\n                )\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.step()\n\n        # Update LR and sparsity coefficients.\n        for pgs, scheds in zip(param_groups, lr_schedulers):\n            for pg, sched in zip(pgs, scheds):\n                pg[\"lr\"] = sched.step()\n\n        # for objective, scheduler in zip(objectives, sparsity_schedulers):\n        #     objective.sparsity_coeff = scheduler.step()\n\n        for opts in optimizers:\n            for opt in opts:\n                opt.zero_grad()\n\n        global_step += 1\n\n    return saes, objectives, run, global_step\n</code></pre>"},{"location":"api/nn/modeling/","title":"saev.nn.modeling","text":"<p>Neural network architectures for sparse autoencoders.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.AuxK","title":"<code>AuxK(key='auxk', k_aux=512, alpha=1 / 32)</code>  <code>dataclass</code>","text":"<p>AuxK auxiliary reconstruction loss for dead latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK","title":"<code>BatchTopK(key='batch-top-k', top_k=32, sparsity=NoSparsity(), momentum=0.1, aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero per sample in the batch.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation","title":"<code>BatchTopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>BatchTopK activation and inference-time threshold for sparse autoencoders.</p> <p>This module implements a BatchTopK nonlinearity that enforces a fixed sparsity budget across a batch, together with an inference-time approximation that replaces the batch-coupled operation with a simple elementwise threshold.</p> <p>Training mode (model.train()):     Given pre-activation codes x with shape [batch, d_sae], the BatchTopK activation flattens the batch to shape [batch * d_sae], selects the largest (batch * top_k) entries by value, and sets all other entries to zero. This enforces an average of exactly <code>top_k</code> active features per example while allowing the \"activation budget\" to move between examples in the batch.</p> <pre><code>During training, we also estimate an inference threshold theta that approximates the effective cutoff induced by BatchTopK. For each batch, we compute the minimum positive activation that survives the BatchTopK mask and update an exponential moving average of this quantity. This running estimate plays the same role as BatchNorm running statistics: it is updated only in training mode and treated as fixed at inference.\n</code></pre> <p>Eval mode (model.eval()):     At inference time we do not apply a batch-coupled top-k, since that would make each example depend on the rest of the eval batch. Instead, we use the stored running threshold theta to define a JumpReLU nonlinearity:</p> <pre><code>    y = x if x &gt; theta else 0\n\napplied elementwise and independently to each example. This preserves the approximate sparsity level learned during training, but makes the layer deterministic and sample-wise independent for evaluation, probing, and downstream use.\n</code></pre> Inputs <p>x: Tensor of shape [batch, d_sae] containing pre-activation codes.</p> Outputs <p>Tensor of shape [batch, d_sae] with the same dtype and device as x, where either:     - in training mode: exactly (batch * top_k) entries are non-zero across the batch due to the BatchTopK mask, or     - in eval mode: entries are zeroed by an elementwise JumpReLU with the learned threshold theta.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: BatchTopK):\n    super().__init__()\n    self.cfg = cfg\n\n    self.register_buffer(\"threshold\", torch.tensor(0.0))\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.BatchTopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to each sample in the batch.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to each sample in the batch.\n    \"\"\"\n\n    if not self.training:\n        if self.threshold &lt;= 0:\n            return torch.where(x &gt; 0, x, torch.zeros_like(x))\n\n        return torch.where(x &gt; self.threshold, x, torch.zeros_like(x))\n\n    bsz, d_sae = x.shape\n    x_flat = x.flatten()\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k * bsz, d_sae * bsz)\n    _, idxs = torch.topk(x_flat, k, sorted=False)\n    mask = torch.zeros_like(x_flat).scatter(-1, idxs, 1.0).reshape(x.shape)\n\n    x = torch.mul(mask, x)\n\n    with torch.no_grad():\n        pos = x[x &gt; 0]\n        if pos.numel() &gt;= 0:\n            self.threshold.mul_(1 - self.cfg.momentum).add_(\n                self.cfg.momentum * pos.min()\n            )\n\n    return x\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoAux","title":"<code>NoAux(key='no-aux')</code>  <code>dataclass</code>","text":"<p>No auxiliary loss (e.g., for ReLU).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.NoSparsity","title":"<code>NoSparsity(key='no-sparsity')</code>  <code>dataclass</code>","text":"<p>No explicit sparsity penalty (e.g. for TopK/BatchTopK where k controls sparsity).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.Relu","title":"<code>Relu(key='relu', sparsity=L1Sparsity(coeff=0.0004), aux=NoAux())</code>  <code>dataclass</code>","text":"<p>Vanilla ReLU</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec.\n    #\n    # .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs:\n    #\n    # 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values.\n    #\n    # 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopK","title":"<code>TopK(key='top-k', top_k=32, sparsity=NoSparsity(), aux=AuxK())</code>  <code>dataclass</code>","text":""},{"location":"api/nn/modeling/#saev.nn.modeling.TopK.top_k","title":"<code>top_k = 32</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>How many values are allowed to be non-zero.</p>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation","title":"<code>TopKActivation(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Top-K activation function. For use as activation function of sparse encoder.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: TopK):\n    super().__init__()\n    self.cfg = cfg\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.TopKActivation.forward","title":"<code>forward(x)</code>","text":"<p>Apply top-k activation to the input tensor.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_sae\"]) -&gt; Float[Tensor, \"batch d_sae\"]:\n    \"\"\"\n    Apply top-k activation to the input tensor.\n    \"\"\"\n\n    bsz, d_sae = x.shape\n    k = min(self.cfg.top_k, d_sae)\n    _, idxs = torch.topk(x, k, dim=-1, sorted=False)\n    mask = torch.zeros_like(x).scatter(-1, idxs, 1.0)\n\n    return torch.mul(mask, x)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/modeling/#saev.nn.modeling.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/nn/objectives/","title":"saev.nn.objectives","text":""},{"location":"api/nn/objectives/#saev.nn.objectives.Loss","title":"<code>Loss()</code>  <code>dataclass</code>","text":"<p>The loss term for an autoencoder training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Loss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka","title":"<code>Matryoshka(n_prefixes=10, dead_threshold_tokens=10000000)</code>  <code>dataclass</code>","text":"<p>Config for the Matryoshka loss for another arbitrary SAE class.</p> <p>Reference code is here: https://github.com/noanabeshima/matryoshka-saes and the original reading is https://sparselatents.com/matryoshka.html and https://arxiv.org/pdf/2503.17547</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.dead_threshold_tokens","title":"<code>dead_threshold_tokens = 10000000</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Tokens without activation before a latent is considered dead.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.Matryoshka.n_prefixes","title":"<code>n_prefixes = 10</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of random length prefixes to use for loss calculation.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss","title":"<code>MatryoshkaLoss(mse, sparsity, l0, l1, aux, n_dead)</code>  <code>dataclass</code>","text":"<p>               Bases: <code>Loss</code></p> <p>The composite loss terms for an training batch.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.aux","title":"<code>aux</code>  <code>instance-attribute</code>","text":"<p>Auxiliary loss term (e.g., AuxK).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l0","title":"<code>l0</code>  <code>instance-attribute</code>","text":"<p>Sum of L0 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.l1","title":"<code>l1</code>  <code>instance-attribute</code>","text":"<p>Sum of L1 magnitudes of hidden activations for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.loss","title":"<code>loss</code>  <code>property</code>","text":"<p>Total loss.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.mse","title":"<code>mse</code>  <code>instance-attribute</code>","text":"<p>Average of reconstruction loss (mean squared error) for all prefix lengths.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.n_dead","title":"<code>n_dead</code>  <code>instance-attribute</code>","text":"<p>Number of dead latents (per aux loss threshold).</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaLoss.sparsity","title":"<code>sparsity</code>  <code>instance-attribute</code>","text":"<p>Sparsity loss, typically lambda * L1.</p>"},{"location":"api/nn/objectives/#saev.nn.objectives.MatryoshkaObjective","title":"<code>MatryoshkaObjective(cfg)</code>","text":"<p>               Bases: <code>Objective</code></p> <p>Torch module for calculating the matryoshka loss for an SAE.</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>def __init__(self, cfg: Matryoshka):\n    super().__init__()\n    self.cfg = cfg\n    self.toks_since_active: Tensor | None = None\n</code></pre>"},{"location":"api/nn/objectives/#saev.nn.objectives.sample_prefixes","title":"<code>sample_prefixes(d_sae, n_prefixes, min_prefix_length=1, pareto_power=0.5)</code>","text":"<p>Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)</p> <p>Parameters:</p> Name Type Description Default <code>d_sae</code> <code>int</code> <p>Total number of latent dimensions</p> required <code>n_prefixes</code> <code>int</code> <p>Number of prefixes to sample</p> required <code>min_prefix_length</code> <code>int</code> <p>Minimum length of any prefix</p> <code>1</code> <code>pareto_power</code> <code>float</code> <p>Power parameter for Pareto distribution (lower = more uniform)</p> <code>0.5</code> <p>Returns:</p> Type Description <code>Int64[Tensor, ' n_prefixes']</code> <p>torch.Tensor: Sorted prefix lengths</p> Source code in <code>src/saev/nn/objectives.py</code> <pre><code>@torch.no_grad()\n@jaxtyped(typechecker=beartype.beartype)\ndef sample_prefixes(\n    d_sae: int, n_prefixes: int, min_prefix_length: int = 1, pareto_power: float = 0.5\n) -&gt; Int64[Tensor, \" n_prefixes\"]:\n    \"\"\"\n    Samples prefix lengths using a Pareto distribution. Derived from \"Learning Multi-Level Features with\n    Matryoshka Sparse Autoencoders\" (https://doi.org/10.48550/arXiv.2503.17547)\n\n    Args:\n        d_sae: Total number of latent dimensions\n        n_prefixes: Number of prefixes to sample\n        min_prefix_length: Minimum length of any prefix\n        pareto_power: Power parameter for Pareto distribution (lower = more uniform)\n\n    Returns:\n        torch.Tensor: Sorted prefix lengths\n    \"\"\"\n    if n_prefixes &lt;= 1:\n        return torch.tensor([d_sae], dtype=torch.int64)\n\n    assert n_prefixes &lt;= d_sae\n\n    # Calculate probability distribution favoring shorter prefixes\n    lengths = torch.arange(1, d_sae)\n    pareto_cdf = 1 - ((min_prefix_length / lengths.float()) ** pareto_power)\n    pareto_pdf = torch.cat([pareto_cdf[:1], pareto_cdf[1:] - pareto_cdf[:-1]])\n    probability_dist = pareto_pdf / pareto_pdf.sum()\n\n    # Sample and sort prefix lengths\n    sampled_indices = torch.multinomial(\n        probability_dist, num_samples=n_prefixes - 1, replacement=False\n    )\n\n    # Convert indices to actual prefix lengths\n    prefixes = lengths[sampled_indices]\n\n    # Add n_latents as the final prefix\n    prefixes = torch.cat((prefixes.detach().clone(), torch.tensor([d_sae])))\n\n    prefixes, _ = torch.sort(prefixes, descending=False)\n\n    return prefixes.to(torch.int64)\n</code></pre>"},{"location":"api/nn/saev.nn/","title":"saev.nn","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder","title":"<code>SparseAutoencoder(cfg)</code>","text":"<p>               Bases: <code>Module</code></p> <p>Sparse auto-encoder (SAE)</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def __init__(self, cfg: SparseAutoencoderConfig):\n    super().__init__()\n\n    self.cfg = cfg\n    self.logger = logging.getLogger(\"sae\")\n\n    self.W_dec = torch.nn.Parameter(\n        torch.nn.init.kaiming_uniform_(torch.empty(cfg.d_sae, cfg.d_model))\n    )\n    self.b_dec = torch.nn.Parameter(torch.zeros(cfg.d_model))\n\n    self.normalize_w_dec()\n\n    # Initialize W_enc to the transpose of W_dec.\n    #\n    # .clone() is critical here. Without it, W_enc is a transposed VIEW sharing storage with W_dec, which causes two bugs:\n    #\n    # 1. load_state_dict breaks: loading W_enc overwrites W_dec (shared memory), then loading W_dec overwrites W_enc. The loaded SAE ends up with one weight being the transpose of the other instead of the independently-trained values.\n    #\n    # 2. Any code that mutates W_dec in-place (e.g. normalize_w_dec) silently mutates W_enc too. The datapoint init in train.make_saes() relied on this accident: normalize_w_dec() kept W_enc columns unit-norm via shared storage. With .clone(), make_saes() must explicitly sync W_enc after normalizing W_dec.\n    self.W_enc = torch.nn.Parameter(self.W_dec.data.T.clone())\n    self.b_enc = torch.nn.Parameter(torch.zeros(cfg.d_sae))\n\n    self.activation = get_activation(cfg.activation)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.EncodeOut","title":"<code>EncodeOut</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Outputs of encode: pre-activations and activated latents.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.Output","title":"<code>Output</code>","text":"<p>               Bases: <code>NamedTuple</code></p> <p>Full SAE forward outputs for objectives and metrics.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.decode","title":"<code>decode(f_x, *, prefixes=None)</code>","text":"<p>Decode latent features to reconstructions.</p> <p>Parameters:</p> Name Type Description Default <code>f_x</code> <code>Float[Tensor, 'batch d_sae']</code> <p>Latent features of shape (batch, d_sae)</p> required <code>prefixes</code> <code>Int64[Tensor, ' n_prefixes'] | None</code> <p>Optional tensor of prefix lengths for Matryoshka decoding.</p> <code>None</code> <p>Returns:</p> Type Description <code>Float[Tensor, 'batch n_prefixes d_model']</code> <p>Matryoshka reconstructions (batch, n_prefixes, d_model).</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def decode(\n    self,\n    f_x: Float[Tensor, \"batch d_sae\"],\n    *,\n    prefixes: Int64[Tensor, \" n_prefixes\"] | None = None,\n) -&gt; Float[Tensor, \"batch n_prefixes d_model\"]:\n    \"\"\"\n    Decode latent features to reconstructions.\n\n    Args:\n        f_x: Latent features of shape (batch, d_sae)\n        prefixes: Optional tensor of prefix lengths for Matryoshka decoding.\n\n    Returns:\n        Matryoshka reconstructions (batch, n_prefixes, d_model).\n    \"\"\"\n    b, d_sae = f_x.shape\n\n    # Matryoshka cumulative decode\n    device = f_x.device\n    if prefixes is None:\n        prefixes = torch.tensor([d_sae], dtype=torch.int64)\n    assert torch.all(prefixes[1:] &gt; prefixes[:-1])\n    assert 1 &lt;= int(prefixes[0]) and int(prefixes[-1]) == d_sae\n    prefixes = prefixes.to(device)\n\n    # Build blocks from prefix cuts: [0, cut1), [cut1, cut2), ...\n    block_indices = torch.cat([\n        torch.tensor([0], dtype=prefixes.dtype, device=device),\n        prefixes,\n    ])\n    blocks = list(zip(block_indices[:-1], block_indices[1:]))\n\n    # Compute block outputs\n    block_outputs = []\n    for i, (start, end) in enumerate(blocks):\n        # Each block uses its portion of f_x and W_dec\n        block_f_x = f_x[:, start:end]\n        block_W_dec = self.W_dec[start:end, :]\n\n        # Compute block output: (batch, d_sae_block) @ (d_sae_block, d_model) -&gt; (batch, d_model)\n        # Note: W_dec is (d_sae, d_model), so block_W_dec is (block_size, d_model)\n        block_output = einops.einsum(\n            block_f_x,\n            block_W_dec,\n            \"... d_sae_block, d_sae_block d_model -&gt; ... d_model\",\n        )\n\n        # Add bias only to the first block\n        if i == 0:\n            block_output = block_output + self.b_dec\n\n        block_outputs.append(block_output)\n\n    # Cumulative sum to get prefix reconstructions\n    x_hats = torch.cumsum(torch.stack(block_outputs, dim=-2), dim=-2)\n\n    # (sam) This is clearly wrong. Needs to be cleaned up.\n    return x_hats\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.forward","title":"<code>forward(x)</code>","text":"<p>Given x, calculates the reconstructed x_hat and the intermediate activations f_x.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>Float[Tensor, 'batch d_model']</code> <p>a batch of transformer activations.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>def forward(self, x: Float[Tensor, \"batch d_model\"]) -&gt; Output:\n    \"\"\"\n    Given x, calculates the reconstructed x_hat and the intermediate activations f_x.\n\n    Arguments:\n        x: a batch of transformer activations.\n    \"\"\"\n    enc = self.encode(x)\n    x_hats = self.decode(enc.f_x)\n\n    return self.Output(h_x=enc.h_x, f_x=enc.f_x, x_hats=x_hats)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.normalize_w_dec","title":"<code>normalize_w_dec()</code>","text":"<p>Set W_dec to unit-norm columns.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef normalize_w_dec(self):\n    \"\"\"\n    Set W_dec to unit-norm columns.\n    \"\"\"\n    if self.cfg.normalize_w_dec:\n        self.W_dec.data /= torch.norm(self.W_dec.data, dim=1, keepdim=True)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoder.remove_parallel_grads","title":"<code>remove_parallel_grads()</code>","text":"<p>Update grads so that they remove the parallel component</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@torch.no_grad()\ndef remove_parallel_grads(self):\n    \"\"\"\n    Update grads so that they remove the parallel component\n    \"\"\"\n    if not self.cfg.remove_parallel_grads:\n        return\n\n    if self.W_dec.grad is None:\n        return\n\n    parallel_component = einops.einsum(\n        self.W_dec.grad,\n        self.W_dec.data,\n        \"d_sae d_model, d_sae d_model -&gt; d_sae\",\n    )\n\n    norm_sq = torch.sum(self.W_dec.data * self.W_dec.data, dim=1)\n    scales = torch.zeros_like(parallel_component)\n    nonzero = norm_sq &gt; 0\n    scales[nonzero] = parallel_component[nonzero] / norm_sq[nonzero]\n\n    self.W_dec.grad -= einops.einsum(\n        scales,\n        self.W_dec.data,\n        \"d_sae, d_sae d_model -&gt; d_sae d_model\",\n    )\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig","title":"<code>SparseAutoencoderConfig(d_model=1024, d_sae=1024 * 16, activation=TopK(), reinit_blend=0.8, reinit_enc_dec_tranpose=True, remove_parallel_grads=True, normalize_w_dec=True)</code>  <code>dataclass</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.activation","title":"<code>activation = TopK()</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Activation function.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_model","title":"<code>d_model = 1024</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Size of x.</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.d_sae","title":"<code>d_sae = 1024 * 16</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Number of features in SAE latent space; size of f(x).</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.normalize_w_dec","title":"<code>normalize_w_dec = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to make sure W_dec has unit norm columns. See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\".</p>"},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_blend","title":"<code>reinit_blend = 0.8</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.reinit_enc_dec_tranpose","title":"<code>reinit_enc_dec_tranpose = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":""},{"location":"api/nn/saev.nn/#saev.nn.SparseAutoencoderConfig.remove_parallel_grads","title":"<code>remove_parallel_grads = True</code>  <code>class-attribute</code> <code>instance-attribute</code>","text":"<p>Whether to remove gradients parallel to W_dec columns (which will be ignored because we force the columns to have unit norm). See Towards Monosemanticity; Appendix \"Advice for Training Sparse Autoencoders: Autoencoder Architecture\" for discussion by Anthropic.</p>"},{"location":"api/nn/saev.nn/#saev.nn.dump","title":"<code>dump(fpath, sae)</code>","text":"<p>Save an SAE checkpoint to disk along with configuration, using the trick from equinox.</p> <p>Parameters:</p> Name Type Description Default <code>fpath</code> <code>Path | str</code> <p>filepath to save checkpoint to.</p> required <code>sae</code> <code>SparseAutoencoder</code> <p>sparse autoencoder checkpoint to save.</p> required Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef dump(fpath: pathlib.Path | str, sae: SparseAutoencoder):\n    \"\"\"\n    Save an SAE checkpoint to disk along with configuration, using the [trick from equinox](https://docs.kidger.site/equinox/examples/serialisation).\n\n    Arguments:\n        fpath: filepath to save checkpoint to.\n        sae: sparse autoencoder checkpoint to save.\n    \"\"\"\n    # Custom serialization to handle activation object\n    cfg_dict = dataclasses.asdict(sae.cfg)\n    # Replace activation dict with custom format\n    activation = sae.cfg.activation\n    cfg_dict[\"activation\"] = _serialize_dataclass(activation)\n\n    header = {\n        \"schema\": SCHEMA_VERSION,\n        \"cfg\": cfg_dict,\n        \"commit\": helpers.current_git_commit() or \"unknown\",\n        \"lib\": __version__,\n    }\n\n    fpath = pathlib.Path(fpath)\n    fpath.parent.mkdir(exist_ok=True, parents=True)\n    with open(fpath, \"wb\") as fd:\n        helpers.jdump(header, fd, option=orjson.OPT_APPEND_NEWLINE)\n        torch.save(sae.state_dict(), fd)\n</code></pre>"},{"location":"api/nn/saev.nn/#saev.nn.load","title":"<code>load(fpath, *, device='cpu')</code>","text":"<p>Loads a sparse autoencoder from disk.</p> Source code in <code>src/saev/nn/modeling.py</code> <pre><code>@beartype.beartype\ndef load(fpath: pathlib.Path | str, *, device=\"cpu\") -&gt; SparseAutoencoder:\n    \"\"\"\n    Loads a sparse autoencoder from disk.\n    \"\"\"\n    with open(fpath, \"rb\") as fd:\n        header = json.loads(fd.readline())\n        buffer = io.BytesIO(fd.read())\n\n    if \"schema\" not in header:\n        # Original, pre-schema format: just raw config parameters\n        # Remove old parameters that no longer exist\n        for keyword in (\n            \"sparsity_coeff\",\n            \"ghost_grads\",\n            \"l1_coeff\",\n            \"use_ghost_grads\",\n            \"seed\",\n        ):\n            header.pop(keyword, None)\n        # Legacy format - create SparseAutoencoderConfig with Relu activation\n        header[\"d_model\"] = header.pop(\"d_vit\")\n        cfg_kwargs = _normalize_cfg_kwargs(header)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=Relu())\n    elif header[\"schema\"] == 1:\n        # Schema version 1: A cautionary tale of poor version management\n        #\n        # This schema version unfortunately has TWO incompatible formats because we made breaking changes without incrementing the schema version. This is exactly what schema versioning is supposed to prevent!\n        #\n        # Format 1A (original): cls field contains activation type (\"Relu\", \"TopK\", etc.)\n        # Format 1B (later): cls field is \"SparseAutoencoderConfig\" and activation is a dict\n        #\n        # The complex logic below exists to handle both formats. This should have been avoided by incrementing to schema version 2 when we changed the format.\n        #\n        # Apologies from Sam for this mess - proper schema versioning discipline would have prevented this confusing situation. Every breaking change should increment the version number!\n\n        cls_name = header.get(\"cls\", \"SparseAutoencoderConfig\")\n        cfg_dict = dict(header[\"cfg\"])\n\n        if cls_name in [\"Relu\", \"TopK\", \"BatchTopK\"]:\n            # Format 1A: Old format where cls indicates the activation type\n            activation_cls = globals()[cls_name]\n            if cls_name in [\"TopK\", \"BatchTopK\"]:\n                activation = activation_cls(top_k=cfg_dict.get(\"top_k\", 32))\n            else:\n                activation = activation_cls()\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs, activation=activation)\n        else:\n            # Format 1B: Newer format with activation as dict\n            if \"activation\" in cfg_dict:\n                activation_info = cfg_dict[\"activation\"]\n                activation = _deserialize_dataclass_payload(\n                    activation_info, allow_legacy_nested=True\n                )\n                cfg_dict[\"activation\"] = activation\n            cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n            cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] in (2, 3, 4):\n        # Schema version 2: cleaner format with activation serialization\n        cfg_dict = dict(header[\"cfg\"])\n        activation_info = cfg_dict[\"activation\"]\n        activation = _deserialize_dataclass_payload(\n            activation_info, allow_legacy_nested=True\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    elif header[\"schema\"] == 5:\n        cfg_dict = dict(header[\"cfg\"])\n        activation = _deserialize_dataclass_payload(\n            cfg_dict[\"activation\"], allow_legacy_nested=False\n        )\n        cfg_dict[\"activation\"] = activation\n        cfg_kwargs = _normalize_cfg_kwargs(cfg_dict)\n        cfg = SparseAutoencoderConfig(**cfg_kwargs)\n    else:\n        raise ValueError(f\"Unknown schema version: {header['schema']}\")\n\n    model = SparseAutoencoder(cfg)\n    model.load_state_dict(torch.load(buffer, weights_only=True, map_location=device))\n    return model\n</code></pre>"},{"location":"api/utils/monitoring/","title":"saev.utils.monitoring","text":""},{"location":"api/utils/monitoring/#saev.utils.monitoring.DataloaderMonitor","title":"<code>DataloaderMonitor(dataloader, process_factory=None)</code>","text":"<p>Tracks IO and CPU activity for the dataloader manager process and its children.</p> <p>The monitor owns the dataloader handle and psutil processes internally, so callers simply construct it with the dataloader and then call <code>compute()</code> whenever metrics are needed.</p> Source code in <code>src/saev/utils/monitoring.py</code> <pre><code>def __init__(\n    self,\n    dataloader: object,\n    process_factory: Callable[[int], psutil.Process] | None = None,\n) -&gt; None:\n    self.dataloader = dataloader\n    self.process_factory = process_factory or psutil.Process\n    self._reset_state()\n</code></pre>"},{"location":"api/utils/saev.utils/","title":"saev.utils","text":""},{"location":"api/utils/scheduling/","title":"saev.utils.scheduling","text":""},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter","title":"<code>BatchLimiter(dataloader, n_samples)</code>","text":"<p>Limits the number of batches to only return <code>n_samples</code> total samples.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, dataloader: DataLoaderLike, n_samples: int):\n    self.dataloader = dataloader\n    self.n_samples = n_samples\n    self.batch_size = dataloader.batch_size\n    self.drop_last = dataloader.drop_last\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.BatchLimiter.__getattr__","title":"<code>__getattr__(name)</code>","text":"<p>Pass through attribute access to the wrapped dataloader.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __getattr__(self, name: str) -&gt; Any:\n    \"\"\"Pass through attribute access to the wrapped dataloader.\"\"\"\n    # __getattr__ is only called when the attribute wasn't found on self\n    # So we delegate to the wrapped dataloader\n    try:\n        return getattr(self.dataloader, name)\n    except AttributeError:\n        # Re-raise with more context about where the attribute was not found\n        raise AttributeError(\n            f\"'{self.__class__.__name__}' object and its wrapped dataloader have no attribute '{name}'\"\n        )\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.Warmup","title":"<code>Warmup(init, final, n_steps)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>final</code> over <code>n_warmup_steps</code> steps.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(self, init: float, final: float, n_steps: int):\n    self.final = final\n    self.init = init\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/scheduling/#saev.utils.scheduling.WarmupCosine","title":"<code>WarmupCosine(init, n_warmup, peak, n_steps, final)</code>","text":"<p>               Bases: <code>Scheduler</code></p> <p>Linearly increases from <code>init</code> to <code>peak</code> over <code>n_warmup</code> steps, then decrease down to final using cosine decay over n_steps - n_warmup.</p> Source code in <code>src/saev/utils/scheduling.py</code> <pre><code>def __init__(\n    self, init: float, n_warmup: int, peak: float, n_steps: int, final: float\n):\n    self.init = init\n    self.peak = peak\n    self.final = final\n    self.n_warmup = n_warmup\n    self.n_steps = n_steps\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/","title":"saev.utils.statistics","text":""},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator","title":"<code>PercentileEstimator(percentile, total, lr=0.001, shape=())</code>","text":"Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def __init__(\n    self,\n    percentile: float | int,\n    total: int,\n    lr: float = 1e-3,\n    shape: tuple[int, ...] = (),\n):\n    self.percentile = percentile\n    self.total = total\n    self.lr = lr\n\n    self._estimate = torch.zeros(shape)\n    self._step = 0\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.PercentileEstimator.update","title":"<code>update(x)</code>","text":"<p>Update the estimator with a new value.</p> <p>This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.</p> <p>Parameters:</p> Name Type Description Default <code>x</code> <code>float | Tensor</code> <p>The new value to incorporate into the estimation</p> required Source code in <code>src/saev/utils/statistics.py</code> <pre><code>def update(self, x: float | Tensor):\n    \"\"\"\n    Update the estimator with a new value.\n\n    This method maintains the marker positions using the P2 algorithm rules. When a new value arrives, it's placed in the appropriate position relative to existing markers, and marker positions are adjusted to maintain their desired percentile positions.\n\n    Arguments:\n        x: The new value to incorporate into the estimation\n    \"\"\"\n    self._step += 1\n\n    step_size = self.lr * (self.total - self._step) / self.total\n\n    # Is a no-op if it's already on the same device.\n    if isinstance(x, Tensor):\n        self._estimate = self._estimate.to(x.device)\n\n    self._estimate += step_size * (\n        torch.sign(x - self._estimate) + 2 * self.percentile / 100 - 1.0\n    )\n</code></pre>"},{"location":"api/utils/statistics/#saev.utils.statistics.calc_batch_entropy","title":"<code>calc_batch_entropy(example_idx, token_idx, n_examples, content_tokens_per_example)</code>","text":"<p>Compute entropy and coverage metrics for a batch of shuffled indices.</p> <p>The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.</p> Source code in <code>src/saev/utils/statistics.py</code> <pre><code>@beartype.beartype\ndef calc_batch_entropy(\n    example_idx: IndexLike,\n    token_idx: IndexLike,\n    n_examples: int,\n    content_tokens_per_example: int,\n) -&gt; dict[str, float]:\n    \"\"\"\n    Compute entropy and coverage metrics for a batch of shuffled indices.\n\n    The returned mapping includes raw entropy (natural log units), normalized entropy, and coverage ratios for both the example indices and the token indices.\n    \"\"\"\n    example_idx_t = _to_tensor(example_idx)\n    token_idx_t = _to_tensor(token_idx)\n    if n_examples &lt;= 0:\n        raise ValueError(\"n_examples must be positive.\")\n    if content_tokens_per_example &lt;= 0:\n        raise ValueError(\"content_tokens_per_example must be positive.\")\n\n    if example_idx_t.ndim != 1:\n        raise ValueError(\"example_idx must be 1D.\")\n    if token_idx_t.ndim != 1:\n        raise ValueError(\"token_idx must be 1D.\")\n    if example_idx_t.numel() == 0:\n        raise ValueError(\"example_idx must contain at least one element.\")\n\n    _assert_batch_dim(example_idx_t, token_idx_t)\n\n    example_metrics = _add_prefix(\n        \"loader/example\", _entropy_metrics(example_idx_t, n_examples)\n    )\n    token_metrics = _add_prefix(\n        \"loader/token\", _entropy_metrics(token_idx_t, content_tokens_per_example)\n    )\n\n    return {**example_metrics, **token_metrics}\n</code></pre>"},{"location":"api/utils/wandb/","title":"saev.utils.wandb","text":""},{"location":"api/utils/wandb/#saev.utils.wandb.ParallelWandbRun","title":"<code>ParallelWandbRun(project, cfgs, mode, tags, dir='.wandb')</code>","text":"<p>Inspired by https://community.wandb.ai/t/is-it-possible-to-log-to-multiple-runs-simultaneously/4387</p> Source code in <code>src/saev/utils/wandb.py</code> <pre><code>def __init__(\n    self,\n    project: str,\n    cfgs: list[dict[str, object]],\n    mode: str,\n    tags: list[str],\n    dir: str = \".wandb\",\n):\n    cfg, *cfgs = cfgs\n    self.project = project\n    self.cfgs = cfgs\n    self.mode = mode\n    self.tags = tags\n    self.dir = dir\n    self.summary_updates: dict[str, object] = {}\n\n    self.live_run = wandb.init(\n        project=project,\n        config=cfg,\n        mode=mode,\n        tags=tags,\n        dir=dir,\n        settings=wandb.Settings(init_timeout=300),\n    )\n\n    self.metric_queues: list[MetricQueue] = [[] for _ in self.cfgs]\n</code></pre>"},{"location":"developers/contributing/","title":"Contributing","text":""},{"location":"developers/contributing/#project-layout","title":"Project layout","text":"<pre><code>docs/\n    mkdocs.yml    # The configuration file.\n    src/\n        index.md  # The documentation homepage.\n        ...       # Other markdown pages, images and other files.\n</code></pre>"},{"location":"developers/datapoint-init/","title":"Datapoint Initialization","text":"<p>Datapoint initialization is an SAE weight initializations strategy independently proposed by Anthropic and Pierre Peigne for improving SAE training.</p> <p>Conceptually, we initialize each decoder column to look like a real datapoint, so every latent starts with a patch of input space where it \"wins\" and gets some gradient. Here's the algorithm:</p> <ol> <li>Select \\(n\\) random data points from your training data.</li> <li>Compute the mean \\(\\mu\\) and zero-center the data: \\(x_0 = x - \\mu\\).</li> <li>Linearly blend each zero-centered datapoint with Kaiming initialization: \\(w = p \\cdot (x - \\mu) + (1 - p) \\cdot r\\) where \\(p\\) is your blend probability and \\(r\\) is a randomly sampled Kaiming initalization vector.</li> <li>Initialize \\(W_\\text{enc}\\) as a concatenation of \\(n\\) blended vectors.</li> <li>Initialize \\(W_\\text{dec}\\) as \\(W_\\text{enc}^T\\).</li> </ol> <p>Anthropic suggests \\(p = 0.8\\) for SAEs and 0.4 for \"weakly causal crosscoders\". I interpret this that there is no universally appropriate \\(p\\).</p>"},{"location":"developers/disk-layout/","title":"Storage &amp; Run Manifest Spec (v1)","text":"<p>There are two main locations:</p> <ol> <li><code>$SAEV_SCRATCH/saev/shards</code>: where we store transformer activations (referred to as <code>shards_root</code> in the codebase).</li> <li><code>$SAEV_NFS/saev/runs</code>: where we store checkpoints and other computed intermediate stuff like example images, probe1d results, etc. (referred to as <code>runs_root</code> in the codebase).</li> </ol> <p>Visually, these are:</p> <pre><code>$SAEV_SCRATCH/saev/\n  shards/\n    &lt;shard_hash&gt;/\n      metadata.json\n      shards.json\n      acts000000.bin\n      acts000001.bin\n      ...\n      labels.bin\n</code></pre> <p>and</p> <pre><code>$SAEV_NFS/saev/\n  runs/\n    &lt;run_id&gt;/\n      checkpoint/           # output of train.py on &lt;shard_hash&gt;\n        sae.pt\n        config.json\n      links/                # Symlinks\n        train-shards        # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        train-dataset       # Whatever the original image dataset was\n        val-shards          # $SCRATCH/saev/shards/&lt;shard_hash&gt;\n        val-dataset         # Whatever the original image dataset was\n      inference/            # outputs from dump.py\n        &lt;shard_hash&gt;/\n          config.json\n          token_acts.npz\n          visuals/          # output of visuals.py\n</code></pre> <p>Each <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/</code> MUST include:</p> <ul> <li><code>metadata.json</code> (UTF-8, canonical spec; see <code>protocol.md</code>)</li> <li><code>shards.json</code> (UTF-8, shard index and sizes; see <code>protocol.md</code>)</li> <li><code>acts*.bin</code> (binary shards; format in <code>protocol.md</code>)</li> <li><code>labels.bin</code> (binary patch labels aligned to shards; format in <code>protocol.md</code>)</li> </ul> <p>Note</p> <p>Immutability: Files under <code>saev/shards/&lt;shard_hash&gt;/</code> MUST be treated as read-only after publication. Any change yields a new <code>shard_hash</code>.</p> <p>All CLI entrypoints should accept a single <code>--run &lt;path&gt;</code> argument. Every other path MUST be resolved from the run root:</p> <ul> <li>ViT activations: <code>links/shards</code> \u2192 <code>saev/shards/&lt;shard_hash&gt;</code></li> <li>Dataset: <code>links/dataset</code> \u2192 Dataset root, wherever it is on disk.</li> <li>SAE checkpoint: <code>checkpoint/sae.pt</code></li> </ul> <p>Example resolution:</p> <pre><code>run = pathlib.Path(cfg.run)\nshards_root = (run / \"links\" / \"shards\").resolve()\ndataset_root = (run / \"links\" / \"dataset\").resolve()\nckpt = run / \"checkpoint\" / \"sae.pt\"\nlabels = vit_root / \"labels.bin\"\n</code></pre> <ul> <li><code>$SAEV_SCRATCH</code> and <code>$SAEV_NFS</code> should be set for all users/processes running saev tools.</li> </ul>"},{"location":"developers/disk-layout/#faqs","title":"FAQs","text":"<ul> <li> <p>Where do patch labels live? Next to <code>acts*.bin</code> in <code>$SAEV_SCRATCH/shards/&lt;shard_hash&gt;/labels.bin</code>. Scripts discover them via <code>links/shards/labels.bin</code>.</p> </li> <li> <p>Can I put datasets directly in <code>$SAEV_SCRATCH</code>? Sure, but not in <code>$SAEV_SCRATCH/shards</code>.</p> </li> </ul>"},{"location":"developers/naming/","title":"Variable Naming","text":""},{"location":"developers/protocol/","title":"saev Sharded Activation File Protocol","text":"<p>saev caches activations to disk rather than run ViT or LLM inference when training SAEs. Gemma Scope makes this decision as well (see Section 3.3.2 of https://arxiv.org/pdf/2408.05147). <code>saev.data</code> has a specific protocol to support this in on OSC, a super computer center, and take advantage of OSC's specific disk performance. </p> <p>Goal: loss-lessly persist very large Transformer (ViT or LLM) activations in a form that is:</p> <ul> <li>mem-mappable</li> <li>Parameterized solely by the experiment configuration (<code>scripts/shards.py:Config</code>)</li> <li>Referenced by a content-hash, so identical configs collide, divergent ones never do</li> <li>Can be read quickly in a random order for training, and can be read (slowly) with random-access for visuals.</li> </ul> <p>This document is the single normative source. Any divergence in code is a bug.</p>"},{"location":"developers/protocol/#1-directory-layout","title":"1. Directory layout","text":"<pre><code>&lt;dump_to&gt;/&lt;HASH&gt;/\n    metadata.json    # UTF-8 JSON, human-readable, describes data-generating config\n    shards.json      # UTF-8 JSON, human-readable, describes shards.\n    acts000000.bin   # shard 0\n    acts000001.bin   # shard 1\n    ...\n    actsNNNNNN.bin   # shard NNNNNN  (zero-padded width=6)\n    labels.bin       # patch labels (optional)\n</code></pre> <p><code>HASH</code> = <code>sha256(json.dumps(metadata, sort_keys=True, separators=(',', ':')).encode('utf-8'))</code> Guards against silent config drift.</p>"},{"location":"developers/protocol/#2-json-file-schemas","title":"2. JSON file schemas","text":""},{"location":"developers/protocol/#21-metadatajson","title":"2.1. <code>metadata.json</code>","text":"field type semantic <code>family</code> string <code>\"clip\" \\| \"siglip\" \\| \"dinov2\"</code> <code>ckpt</code> string model identifier (OpenCLIP, HF, etc.) <code>layers</code> int[] ViT residual\u2010block indices recorded <code>patches_per_ex</code> int example patches only (excludes CLS) <code>cls_token</code> bool <code>true</code> -&gt; patch 0 is CLS, else no CLS <code>d_model</code> int activation dimensionality <code>n_examples</code> int total examples in dataset <code>patches_per_shard</code> int logical activations per shard (see #3) <code>data</code> object opaque dataset description <code>dataset</code> string absolute path to original dataset root <code>dtype</code> string numpy dtype. Fixed <code>\"float32\"</code> for now. <code>protocol</code> string <code>\"2.1\"</code> (shards after big refactor) <p>The <code>data</code> object is <code>base64.b64encode(pickle.dumps(img_ds)).decode('utf8')</code>.</p> <p>The <code>dataset</code> field stores the absolute path to the root directory of the original image dataset, allowing runs to create symlinks back to the source images for visualization and analysis.</p>"},{"location":"developers/protocol/#22-shardsjson","title":"2.2. <code>shards.json</code>","text":"<p>A single array of <code>shard</code> objects, each of which has the following fields:</p> field type semantic name string shard filename (<code>acts000000.bin</code>). n_examples int the number of examples in the shard."},{"location":"developers/protocol/#3-shard-sizing-maths","title":"3. Shard sizing maths","text":"<pre><code>tokens_per_ex = patches_per_ex + (1 if cls_token else 0)\n\nexamples_per_shard = floor(patches_per_shard / (tokens_per_ex * len(layers)))\n\nshape_per_shard = (\n    examples_per_shard,\n    len(layers),\n    tokens_per_ex,\n    d_model,\n)\n</code></pre> <p><code>patches_per_shard</code> is a budget (default ~2.4 M) chosen so a shard is approximately 10 GiB for Float32 @ <code>d_model = 1024</code>.</p> <p>The last shard will have a smaller value for <code>examples_per_shard</code>; this value is documented in <code>n_examples</code> in <code>shards.json</code></p>"},{"location":"developers/protocol/#4-data-layout-and-global-indexing","title":"4. Data Layout and Global Indexing","text":"<p>The entire dataset of activations is treated as a single logical 4D tensor with the shape <code>(n_examples, len(layers), tokens_per_ex, d_model)</code>. This logical tensor is C-contiguous with axes ordered <code>[Example, Layer, Token, Dimension]</code>.</p> <p>Physically, this tensor is split along the first axis (<code>Example</code>) into multiple shards, where each shard is a single binary file. The number of examples in each shard is constant, except for the final shard, which may be smaller.</p> <p>To locate an arbitrary activation vector, a reader must convert a logical coordinate (<code>global_ex_idx</code>, <code>layer_value</code>, <code>token_idx</code>) into a file path and an offset within that file.</p>"},{"location":"developers/protocol/#41-definitions","title":"4.1 Definitions","text":"<p>Let the parameters from <code>metadata.json</code> be:</p> <ul> <li>L = <code>len(layers)</code></li> <li>P = <code>patches_per_ex</code></li> <li>T = <code>P + (1 if cls_token else 0)</code> (Total tokens per example)</li> <li>D = <code>d_model</code></li> <li>S = <code>n_examples</code> from <code>shards.json</code> or <code>examples_per_shard</code> from Section 3 (shard sizing).</li> </ul>"},{"location":"developers/protocol/#42-coordinate-transformations","title":"4.2 Coordinate Transformations","text":"<p>Given a logical coordinate:</p> <ul> <li><code>global_ex_idx</code>: integer, with <code>0 &lt;= global_ex_idx &lt; n_examples</code></li> <li><code>layer</code>: integer, must be an element of <code>layers</code></li> <li><code>token_idx</code>: integer, <code>0 &lt;= token_idx &lt; T</code></li> </ul> <p>The physical location is found as follows:</p> <ol> <li> <p>Identify Shard:</p> <ul> <li><code>shard_idx = global_ex_idx // S</code></li> <li><code>ex_in_shard = global_ex_idx % S</code> The target file is <code>acts{shard_idx:06d}.bin</code>.</li> </ul> </li> <li> <p>Identify Layer Index: The stored data contains a subset of the ViT's layers. The logical <code>layer_value</code> must be mapped to its index in the stored <code>layers</code> array.</p> <ul> <li><code>layer_idx = layers.index(layer)</code> A reader must raise an error if <code>layer</code> is not in <code>layers</code>.</li> </ul> </li> <li> <p>Calculate Offset: The data within a shard is a 4D tensor of shape <code>(S, L, T, D)</code>. The offset to the first byte of the desired activation vector <code>[ex_in_shard, layer_idx , token_idx]</code> is:</p> <ul> <li><code>offset_in_vectors = (ex_in_shard * L * T) + (layer_idx * T) + token_idx</code></li> <li><code>offset_in_bytes = offset_in_vectors * D * 4</code> (assuming 4 bytes for <code>float32</code>)</li> </ul> </li> </ol> <p>A reader can then seek to <code>offset_in_bytes</code> and read \\(D \\times 4\\) bytes to retrieve the vector.</p> <p>Alternatively, rather than calculate the offset, readers can memmap the shard, then use Numpy indexing to get the activation vector.</p>"},{"location":"developers/protocol/#43-token-axis-layout","title":"4.3 Token Axis Layout","text":"<p>The <code>token</code> axis of length \\(T\\) is ordered as follows: * If <code>cls_token</code> is <code>true</code>:     * Index <code>0</code>: [CLS] token activation     * Indices <code>1</code> to \\(P\\): Patch token activations * If <code>cls_token</code> is <code>false</code>:     * Indices <code>0</code> to \\(P-1\\): Patch token activations</p> <p>The relative order of patch tokens is preserved exactly as produced by the upstream Vision Transformer.</p>"},{"location":"developers/protocol/#5-versioning-compatibility","title":"5 Versioning &amp; compatibility","text":"<ul> <li>Major changes (shape reorder, dtype switch, new required JSON keys) increment the major protocol version number at the top of this document and must emit a breaking warning in loader code.</li> <li>Minor, backward-compatible additions (new optional JSON key) merely update this doc and the minor protocol version number.</li> </ul> <p>That's it. Anything else you find in code that contradicts this document, fix the code or update the spec.</p>"},{"location":"developers/workflows/","title":"Workflows","text":"<ol> <li>Generate inference activations (and thus visuals) for both training and validation splits.</li> </ol>"},{"location":"users/bird-mae-debugging/","title":"Debugging Bird-MAE Activations","text":"<p>This is an example of the kind of debugging you might have to do when training SAEs on a new model. The short version: Bird-MAE has an \"emergent outlier feature\" in dimension 296 that blows up after the first MLP. The fix is to record activations after the pre-MLP LayerNorm (<code>block.norm2</code>) instead of the raw residual stream, because the LayerNorm learns to suppress the outlier.</p>"},{"location":"users/bird-mae-debugging/#symptom-80-dead-neurons","title":"Symptom: 80% dead neurons","text":"<p>While training TopK SAEs on BirdMAE activations taken from birdsong, ~80% of my neurons were dead from the very start of training.</p>"},{"location":"users/bird-mae-debugging/#comparing-to-known-good-activations","title":"Comparing to known-good activations","text":"<p>First, I compared activations from BirdMAE to DINOv3 activations (which I know are well-behaved). I recorded 300K content token activation vectors from layer 14/24 from DINOv3 ViT-L/16 and BirdMAE-L. Each vector has 1024 dimensions. I flattened these vectors; for each of BirdMAE and DINOv3, I have a list of 307.2M neuron activations (300K x 1024 = 307,200,000). I plotted a histogram below. Note the log scale on the y-axis.</p> <p></p> <p>I zoomed in on the left-most cluster, ignoring the right cluster. While BirdMAE is more spread out, the shapes look good enough for now.</p> <p></p>"},{"location":"users/bird-mae-debugging/#finding-the-outlier-dimension-296","title":"Finding the outlier: dimension 296","text":"<p>Looking at the right cluster, I realized that all of these values are from neuron 296 of 1024. Here, I colored activations based on their neuron: all BirdMAE neurons besides 296 are blue, DINOv3 is orange, and neuron 296 is red.</p> <p></p> <p>My activation matrix is \\(\\mathbb{R}^{300K \\times 1024}\\) for each dataset. In code, what I see is:</p> <pre><code>bird_acts.shape  # (300K, 1024)\nbird_acts[:, 295].min()  # 2549.54\nbird_acts[:, 295].max()  # 4625.12\n</code></pre> <p>Something is broken inside of BirdMAE.</p>"},{"location":"users/bird-mae-debugging/#tracing-the-outlier-through-the-residual-stream","title":"Tracing the outlier through the residual stream","text":"<p>Where in BirdMAE does this abnormality show up? Consider transformers as residual streams. After what layer does dimension 296/1024 blow up? See this diagram below: for a single random example from BirdMAE, we will track both the average neuron and neuron 296's value through the 24 transformer layers.</p> <p></p> <p>BirdMAE uses 256 content tokens for a single example. We take the average value of each neuron in the residual stream before each transformer block (the green \"Graph #1\" circle in the above diagram) and after the final transformer block. We plot each of the 1023 \"well-behaved\" neurons in light blue. We plot our degenerate neuron 296 in red. Note the log scale on the y-axis.</p> <p></p> <p>Our well-behaved neurons mostly stay in (-10, 10). Neuron 296 jumps straight to ~2.2K after the first residual block and is never fixed again. It's well-behaved coming out of the patch embedding before the first residual block.</p>"},{"location":"users/bird-mae-debugging/#narrowing-it-down-the-first-mlp","title":"Narrowing it down: the first MLP","text":"<p>Below is the output from the attention layers (Graph #2) in our architecture diagram.</p> <p></p> <p>Neuron 296 is mostly well-behaved; it's a little big after the second attention layer, but not insane.</p> <p></p> <p>Here, we can see that the output of the first MLP produces an abnormally high value for neuron 296. Why?</p> <p>Here's a architecture diagram of BirdMAE's MLPs according to the model definition on HuggingFace. Let's look at the trainable parameters in these MLP across layers, starting from the end and working backwards.</p> <p></p> <p><code>fc2</code> has a <code>weight</code> parameter with shape (4096, 1024) and a <code>bias</code> parameter with shape (1024,). I take the L2 norm of <code>fc2.weight</code>'s columns to see if col 296/1024 is different.</p> <p></p> <p><code>fc2.weight</code> does appear to be different, and abnormally large (note the log scale). <code>fc2.bias</code> is also different, but it's not immediately obvious what's going on there to me.</p>"},{"location":"users/bird-mae-debugging/#root-cause-emergent-outlier-features","title":"Root cause: emergent outlier features","text":"<p>This is a known phenomenon in transformers called \"emergent outlier features.\" After extensive pretraining, a single dimension in the residual stream accumulates a very large magnitude. The model never needs to \"fix\" this because the pre-attention and pre-MLP `LayerNormss learn to suppress it: the learned multiplicative weight for dimension 296 is very small, and the bias is approximately 1. So later layers never actually \"see\" the outlier in practice.</p> <p>We verified this by inspecting <code>norm2.weight</code> across layers and confirming that the learned scale for dimension 296 is near-zero, but that analysis is not reproduced here.</p> <p>The BirdMAE authors never had to deal with this because all downstream use of the model goes through LayerNorm first.</p>"},{"location":"users/bird-mae-debugging/#fix-record-after-layernorm","title":"Fix: record after LayerNorm","text":"<p>The fix is to record activations after <code>block.norm2</code> (the pre-MLP LayerNorm) instead of from the raw residual stream. In <code>saev</code>, this is implemented as:</p> <pre><code>def get_residuals(self) -&gt; list[torch.nn.Module]:\n    return [block.norm2 for block in self.model.blocks]\n</code></pre> <p>After this change, the outlier is suppressed and SAE training works normally.</p>"},{"location":"users/bird-mae-debugging/#lessons","title":"Lessons","text":"<ol> <li>Compare activation distributions to a known-good model. Histogramming flattened activations from 300K tokens is cheap and can reveal outliers.</li> <li>Emergent outlier features are real. If a single dimension dominates your activation distribution, check whether it's a known artifact of pretraining before assuming your recording code is wrong.</li> <li>Record after LayerNorm, not from the raw residual stream. The residual stream can carry high-magnitude \"bookkeeping\" values that LayerNorm suppresses. Recording post-norm avoids this entirely.</li> </ol>"},{"location":"users/glossary/","title":"Glossary","text":"<p>Definitions for words used in the code and documentation.</p> <ul> <li>example: one dataset item (image, sentence, audio clip, point cloud, graph instance).</li> <li>token: one model position in the encoder\u2019s residual stream (the thing with hidden size <code>d_model</code>). Always \"token\" inside the model.</li> <li>content token: tokens derived from the raw input (image patches, wordpieces, audio windows, nodes, etc.).</li> <li>special token: tokens not directly derived from the raw input (class/summary token, [SEP], [MASK], [PAD], register tokens, etc.).</li> <li>sequence length L: total tokens per example (content + special). If variable, call it \u201cragged\u201d.</li> <li>layer: an integer index into the encoder\u2019s stack.</li> <li>activation kind (optional but useful): which stream you saved (e.g., resid_pre, resid_post, mlp_out, attn_out, qkv, head_out).</li> </ul> <p>Modality-specific vocab:</p> <ul> <li>patch (vision): a 2D content token. Often laid out on a grid with shape (H_patches, W_patches).</li> <li>frame/token or tube (video): content token in time \u00d7 space; often (T, H, W).</li> <li>wordpiece / subword (text): content token from a tokenizer.</li> <li>window / frame (audio): time\u2013frequency window.</li> <li>node (graph), point (point cloud).</li> </ul>"},{"location":"users/guide/","title":"Guide","text":"<p>This guide explains how to transition from the ADE20K demo to using <code>saev</code> with your own custom datasets.</p> <p>Here are the steps:</p> <ol> <li>Save ViT activations to disk</li> <li>Train SAEs on activations</li> <li>Evaluate the SAE checkpoints</li> <li>Visualize Learned Features</li> </ol> <p>Note</p> <p><code>saev</code> assumes you are running on NVIDIA GPUs. On a multi-GPU system, prefix your commands with <code>CUDA_VISIBLE_DEVICES=X</code> to run on GPU X.</p>"},{"location":"users/guide/#save-vit-activations-to-disk","title":"Save ViT Activations to Disk","text":"<p>To save activations to disk, we need to specify:</p> <ol> <li>Which model we would like to use</li> <li>Which layers we would like to save.</li> <li>Where on disk and how we would like to save activations.</li> <li>Which images we want to save activations for.</li> </ol> <p>The <code>saev/framework/shards.py</code> script does all of this for us.</p> <p>Run <code>uv run launch.py shards --help</code> to see all the configuration.</p> <p>In practice, you might run:</p> <pre><code>uv run launch.py shards \\\n  --shards-root /fs/scratch/PAS2136/samuelstevens/saev/shards \\\n  --family clip \\\n  --ckpt ViT-B-16/openai \\\n  --d-model 768 \\\n  --layers 6 7 8 9 10 11 \\\n  --content-tokens-per-example 196 \\\n  --batch-size 512 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  data:img-seg-folder \\\n  --data.root /fs/scratch/PAS2136/samuelstevens/datasets/ADEChallengeData2016/ \\\n  --data.split training\n</code></pre> <p>This will save activations for the CLIP-pretrained model ViT-B/16, which has a residual stream dimension of 768, and has 196 patches per image (224 / 16 = 14; 14 x 14 = 196). It will save the last 6 layers. It will write 2.4M patches per shard, and save shards to a new directory <code>/fs/scratch/PAS2136/samuelstevens/saev/shards</code>.</p> <p>Note</p> <p>A note on storage space: A ViT-B/16 on ImageNet-1K will save 1.2M images x 197 patches/layer/image x 1 layer = ~240M activations, each of which take up 768 floats x 4 bytes/float = 3072 bytes, for a total of 723GB for the entire dataset. As you scale to larger models (ViT-L has 1024 dimensions, 14x14 patches are 224 patches/layer/image), recorded activations will grow even larger.</p> <p>This script will also save a <code>metadata.json</code> file that will record the relevant metadata for these activations, which will be read by future steps. The activations will be in <code>.bin</code> files, numbered starting from 000000.</p> <p>To add your own models, see the guide to extending in <code>saev.activations</code>.</p>"},{"location":"users/guide/#train-saes-on-activations","title":"Train SAEs on Activations","text":"<p>To train an SAE, we need to specify:</p> <ol> <li>Which activations to use as input.</li> <li>SAE architectural stuff.</li> <li>Optimization-related stuff.</li> </ol> <p>The <code>train.py</code> script handles this.</p> <p>Run <code>uv run train.py --help</code> to see all the configuration.</p> <p>The most important options are:</p> <ul> <li><code>--runs-root</code>: where to store runs.</li> <li><code>--train-data</code> and <code>--val-data</code>: How to load the training and validation data. You probably want to specify both <code>--{train,val}-data.shards</code> (the shard directory) and <code>--{train,val}-data.layer</code> (which layer to use).</li> <li><code>sae.activation</code>: <code>sae.activation:relu</code> to use the ReLU activation.</li> </ul> <p>This is a full example:</p> <pre><code>uv run train.py \\\n  --runs-root /fs/ess/PAS2136/samuelstevens/saev/runs \\\n  --lr 4e-3 \\\n  --sae.exp-factor 16 \\\n  --sae.d-model 1024 \\\n  --tag ade20k-v0.1 \\\n  --n-train 100_000_000 \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --train-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/51567c6c \\\n  --train-data.layer 11 \\\n  --val-data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3e27794f \\\n  --val-data.layer 11 \\\n  sae.activation:relu \\\n  objective:matryoshka \\\n  --objective.sparsity-coeff 1e-3 \\\n</code></pre> <p>This will train one (1) sparse autoencoder on the data. See the section on sweeps to learn how to train multiple SAEs in parallel using one or more GPUs.</p>"},{"location":"users/guide/#loader-entropy-metrics","title":"Loader Entropy Metrics","text":"<p>The training loop logs additional loader diagnostics derived from <code>calc_batch_entropy</code> in <code>train.py</code>. Every batch contributes two entropy measurements in natural log units:</p> <ul> <li><code>loader/example_entropy</code> and <code>loader/example_entropy_normalized</code> summarize how evenly the shuffled loader samples example indices. Normalization divides the raw entropy by <code>ln(metadata.n_examples)</code> so perfectly uniform sampling is 1.0.</li> <li><code>loader/token_entropy</code> and <code>loader/token_entropy_normalized</code> do the same for patch indices using <code>ln(metadata.content_tokens_per_example)</code> as the normalizer.</li> <li><code>loader/example_coverage</code> and <code>loader/token_coverage</code> report the fraction of distinct example or patch indices seen in the current batch relative to their theoretical support.</li> </ul> <p>All eight metrics appear alongside the existing <code>loader/read_mb</code> counters, helping spot skewed sampling or under-covered patches mid-run.</p>"},{"location":"users/guide/#evaluation","title":"Evaluation","text":"<p>After training an SAE, you probably want to use the SAE. While you can use the SAE as a regular PyTorch <code>torch.nn.Module</code> in combination with a <code>saev.data.OrderedDataLoader</code> or <code>saev.data.IndexedDataset</code>.</p> <p>However, most SAEs are evaluated with a similar set of metrics (normalized MSE, L0, etc). The <code>saev/framework/inference.py</code> script calculates these metrics. You can run <code>uv run launch.py inference --help</code> to see all the options.</p> <p>The most important options are:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--data</code>: The options for the OrderedDataLoader. Specifically, you need to set <code>--data.shards</code> and <code>--data.layer</code>, just like for training.</li> </ul> <pre><code>uv run launch.py inference \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/z55bntm1/ \\\n  --data.shards /fs/scratch/PAS2136/samuelstevens/saev/shards/614861a0 \\\n  --data.layer 11\n</code></pre>"},{"location":"users/guide/#visualize-learned-features","title":"Visualize Learned Features","text":"<p>Now that you've trained an SAE, you probably want to look at its learned features. One way to visualize an individual learned feature is by picking out images that maximize the activation of feature. We use the saved sparse <code>token_acts.npz</code> file from the previous inference step.</p> <p>Warning</p> <p>Because there are so many different ways to visualize SAE features, I moved it to <code>contrib/trait_discovery</code> (used for our preprint \"Towards Open-Ended Visual Scientific Discovery with Sparse Autoencoders\").</p> <p>The most important options:</p> <ul> <li><code>--run</code>: The path to the SAE run directory.</li> <li><code>--shards</code>: The shards directory.</li> <li><code>--latents</code>: The 0-indexed latents to save images for.</li> <li><code>--n-latents</code>: The number of randomly selected latents to save images for.</li> </ul> <p>So first, move into the <code>contrib/trait_discovery</code>:</p> <pre><code>cd contrib/trait_discovery\n</code></pre> <p>Then run the script that generates highlighted images:</p> <pre><code>uv run scripts/launch.py visuals \\\n  --run /fs/ess/PAS2136/samuelstevens/saev/runs/unu6dbfb \\\n  --shards /fs/scratch/PAS2136/samuelstevens/saev/shards/3802cb66 \\\n  --latents 0 1 2 3 4 5 6 7 8 9 49 56 57 125 202 \\\n  --n-latents 20 \\\n</code></pre> <p>Note</p> <p>Because of limitations in the SAE training process, not all SAE latents are equally interesting. Some latents are dead, some are dense, some only fire on two images, etc. Typically, you want neurons that fire very strongly (high value) and fairly infrequently (low frequency). You might be interested in particular, fixed latents (<code>--include-latents</code>). I recommend using <code>saev/interactive/metrics.py</code> with marimo to figure out good thresholds.</p>"},{"location":"users/guide/#sweeps","title":"Sweeps","text":"<p>tl;dr: basically the slow part of training SAEs is loading vit activations from disk, and since SAEs are pretty small compared to other models, you can train a bunch of different SAEs in parallel on the same data using a big GPU. That way you can sweep learning rate, lambda, etc. all on one GPU.</p>"},{"location":"users/guide/#why-parallel-sweeps","title":"Why Parallel Sweeps","text":"<p>SAE training optimizes for a unique bottleneck compared to typical ML workflows: disk I/O rather than GPU computation. When training on vision transformer activations, loading the pre-computed activation data from disk is often the slowest part of the process, not the SAE training itself.</p> <p>A single set of ImageNet activations for a vision transformer can require terabytes of storage. Reading this data repeatedly for each hyperparameter configuration would be extremely inefficient.</p>"},{"location":"users/guide/#parallelized-training-architecture","title":"Parallelized Training Architecture","text":"<p>To address this bottleneck, we implement parallel training that allows multiple SAE configurations to train simultaneously on the same data batch:</p> <pre>\nflowchart TD\n    A[Pre-computed ViT Activations] --&gt;|Slow I/O| B[Memory Buffer]\n    B --&gt;|Shared Batch| C[SAE Model 1]\n    B --&gt;|Shared Batch| D[SAE Model 2]\n    B --&gt;|Shared Batch| E[SAE Model 3]\n    B --&gt;|Shared Batch| F[...]\n</pre> <p>This approach:</p> <ul> <li>Loads each batch of activations once from disk</li> <li>Uses that same batch for multiple SAE models with different hyperparameters</li> <li>Amortizes the slow I/O cost across all models in the sweep</li> </ul>"},{"location":"users/guide/#running-a-sweep","title":"Running a Sweep","text":"<p>The <code>train</code> command accepts a <code>--sweep</code> parameter that points to a TOML file defining the hyperparameter grid:</p> <pre><code>uv run launch.py train --sweep configs/my_sweep.toml\n</code></pre> <p>Here's an example sweep configuration file:</p> <pre><code>[sae]\nsparsity_coeff = [1e-4, 2e-4, 3e-4]\nd_model = 768\nd_sae = [6144, 12288]\n\n[data]\nscale_mean = true\n</code></pre> <p>This would train 6 models (3 sparsity coefficients \u00d7 2 SAE widths), each sharing the same data loading operation.</p>"},{"location":"users/guide/#limitations","title":"Limitations","text":"<p>Not all parameters can be swept in parallel. Parameters that affect data loading (like <code>batch_size</code> or dataset configuration) will cause the sweep to split into separate parallel groups. The system automatically handles this division to maximize efficiency.</p>"},{"location":"users/inference/","title":"Inference","text":"<p>If you want to get started quickly, try the inference notebook in marimo or on Google Colab.</p> <p>Briefly, you need to:</p> <ol> <li>Download a checkpoint.</li> <li>Get the code.</li> <li>Load the checkpoint.</li> <li>Get activations.</li> </ol> <p>Details are below.</p>"},{"location":"users/inference/#download-a-checkpoint","title":"Download a Checkpoint","text":"<p>First, download an SAE checkpoint from the Huggingface collection.</p>"},{"location":"users/inference/#single-checkpoint-repos","title":"Single-checkpoint repos","text":"<p>Some repos (CLIP, BioCLIP, DINOv2) contain a single <code>sae.pt</code> at the root. For instance, the SAE trained on OpenAI's CLIP ViT-B/16 with ImageNet-1K activations is here.</p> <p>You can use <code>wget</code> if you want:</p> <pre><code>wget https://huggingface.co/osunlp/SAE_CLIP_24K_ViT-B-16_IN1K/resolve/main/sae.pt\n</code></pre>"},{"location":"users/inference/#multi-checkpoint-repos","title":"Multi-checkpoint repos","text":"<p>The DINOv3 repos contain multiple checkpoints organized by layer and sparsity level. Each repo has a <code>manifest.jsonl</code> with metadata (layer, L0, MSE) for every checkpoint, so you can pick the right one programmatically.</p> <p>Download a specific checkpoint:</p> <pre><code>from huggingface_hub import hf_hub_download\n\n# Pick a specific layer and run ID from the repo's README or manifest.jsonl\npath = hf_hub_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\", \"layer_23/lnleoyf6/sae.pt\")\n</code></pre> <p>Download all checkpoints in a repo:</p> <pre><code>from huggingface_hub import snapshot_download\n\nsnapshot_download(\"osunlp/SAE_DINOv3_ViT-L-16_IN1K\")\n</code></pre> <p>Available DINOv3 repos:</p> <ul> <li>osunlp/SAE_DINOv3_ViT-S-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-B-16_IN1K (layers 6-11)</li> <li>osunlp/SAE_DINOv3_ViT-L-16_IN1K (layers 13-23)</li> <li>osunlp/SAE_DINOv3_TopK_ViT-L-16_IN1K (layers 13-23)</li> </ul>"},{"location":"users/inference/#get-the-code","title":"Get the Code","text":"<p>The easiest way to do this is to clone the code:</p> <pre><code>git clone https://github.com/Imageomics/saev\n</code></pre> <p>You can also install the package from git if you use uv (not sure about pip or cuda):</p> <pre><code>uv add git+https://github.com/Imageomics/saev\n</code></pre> <p>Or clone it and install it as an editable with pip, lik <code>pip install -e .</code> in your virtual environment.</p> <p>Then you can do things like <code>from saev import ...</code>.</p> <p>Note</p> <p>If you struggle to get <code>saev</code> installed, open an issue on GitHub and I will figure out how to make it easier.</p>"},{"location":"users/inference/#load-the-checkpoint","title":"Load the Checkpoint","text":"<pre><code>import saev.nn\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n</code></pre> <p>Now you have a pretrained SAE.</p>"},{"location":"users/inference/#get-activations","title":"Get Activations","text":"<p>This is the hardest part. We need to:</p> <ol> <li>Pass an image into a ViT</li> <li>Record the dense ViT activations at the same layer that the SAE was trained on.</li> <li>Pass the activations into the SAE to get sparse activations.</li> <li>Do something interesting with the sparse SAE activations.</li> </ol> <p>There are examples of this in the demo code: for classification and semantic segmentation. If the permalinks change, you are looking for the <code>get_sae_latents()</code> functions in both files.</p> <p>Below is example code to do it using the <code>saev</code> package.</p> <pre><code>import saev.nn\nimport saev.data.models\nimport saev.data.shards\n\nsae = saev.nn.load(\"PATH_TO_YOUR_SAE_CKPT.pt\")\n\nvit_cls = saev.data.models.load_model_cls(\"clip\")\nvit = vit_cls(\"ViT-B-16/openai\").to(device)\nvit = saev.data.shards.RecordedTransformer(vit, 196, True, [10])\n\nimg_tr, _ = vit_cls.make_transforms(\"ViT-B-16/openai\", 196)\nimg = Image.open(\"example.jpg\")\n\nx = img_tr(img)\n# Add a batch dimension.\nx = x[None, ...]\n_, vit_acts = vit(x)\n# Select the only layer and ignore the CLS token.\nvit_acts = vit_acts[:, 0, 1:, :]\n\nout = sae(vit_acts)\n# out.f_x: sparse SAE latents (batch, d_sae)\n# out.x_hats: reconstructed activations (batch, n_prefixes, d_model)\n</code></pre> <p>Now you have the sparse representation of all patches in the image (<code>out.f_x</code>) and the reconstructed activations (<code>out.x_hats</code>).</p> <p>You might select the dimensions with maximal values for each patch and see what other images are maximally activating.</p>"},{"location":"users/new-project/","title":"New Project Structure","text":"<p>saev is structured like big_vision, Google's ViT codebase. To get the most use out of saev, you should not use it as a requirement in your project; rather, you should build inside of the source code of saev. This is a guide to that process.</p> <p>TL;DR:</p> <ol> <li>Fork saev.</li> <li>Clone your fork.</li> <li>Create a new directory in <code>contrib/</code>.</li> <li>Update both <code>src/saev</code> and your new contrib directory as necessary.</li> <li>(Hopefully) publish.</li> <li>If your changes to <code>src/saev</code> are broadly useful and not overly restrictive, open a PR with your changes to <code>src/saev</code>.</li> </ol> <p>I am currently applying SAEs to audio of birdsong, so this is how I'll develop it.</p> <p>First, fork and clone saev. Do this however you want, but GitHub has a guide on it.</p> <p>Second, you probably want to store code related to your project in this repo. Make a new directory in <code>contrib/</code>. I'm calling my new subproject \"birdsong.\"</p> <pre><code>[I] samuelstevens@host ~/p/saev (main)&gt; tree -L 1 contrib/\ncontrib/\n\u251c\u2500\u2500 birdsong\n\u251c\u2500\u2500 interactive_interp\n\u2514\u2500\u2500 trait_discovery\n</code></pre> <p>Use <code>uv</code> to make a new package inside your new project:</p> <pre><code>[I] samuelstevens@host ~/p/s/c/birdsong (main)&gt; uv init --package .\nAdding `birdsong` as member of workspace `~/projects/saev`\nInitialized project `birdsong` at `~/projects/saev/contrib/birdsong`\n</code></pre> <p>Now you have some additional files.</p> <pre><code>[I] samuelstevens@ascend-login02 ~/p/s/c/birdsong (main)&gt; tree\n.\n\u251c\u2500\u2500 pyproject.toml\n\u251c\u2500\u2500 README.md\n\u2514\u2500\u2500 src\n    \u2514\u2500\u2500 birdsong\n        \u2514\u2500\u2500 __init__.py\n</code></pre> <p>Now I can write scripts and source code for birdsong-specific stuff in here. I'll probably add a notebook for looking at instances of birdsongs before and after using SAEs to identify patterns under a new <code>birdsong/notebooks</code> directory, and will add <code>birdsong/logbook.md</code> to store ongoing TODO items, and so on.</p> <p>To train SAEs on audio files, I'll need to add a new dataset type to save activations. In order to do this, I'll edit <code>src/saev/data/datasets.py</code>.</p> <p>I'll also need to add another model to the dataset, one that expects audio files. Since I don't think that DINOv3, OpenCLIP, or the other existing model families will be suitable, I'll need to add a new model family. Again, this will need to go somewhere in <code>src/saev/data</code>.</p> <p>If I'm smart about it, these changes will be nice and non-destructive, and other users of saev can benefit from them. After I publish some results, to share this code with others, I'll open a PR from my fork/branch to main with the new datasets/models. But I won't open a PR with <code>birdsong</code> because that's specific to me, rather than to the library.<sup>1</sup></p> <ol> <li> <p>Technically, <code>birdsong</code> will be in saev because I'm a sort of privileged user because I'm the main developer. But other folks probably want their project-specific code attached to their GitHub page, rather than OSU-NLP's.\u00a0\u21a9</p> </li> </ol>"},{"location":"users/sweeps/","title":"Sweeps","text":"<p>Hyperparameter sweeps in <code>saev</code> train multiple SAE configurations in parallel on a single GPU, amortizing the cost of loading activation data from disk across all models. Furthermore, sweeps make it easy to train multiple SAEs with one command across multiple GPUs using Slurm.</p>"},{"location":"users/sweeps/#quick-start","title":"Quick Start","text":"<p>Create a Python file defining your sweep:</p> <pre><code># sweeps/my_sweep.py\n\n\ndef make_cfgs() -&gt; list[dict]:\n    cfgs = []\n\n    # Grid search over learning rate and sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"objective\": {\"sparsity_coeff\": sparsity},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Run the sweep:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23\n</code></pre> <p>This trains 9 SAEs (3 learning rates x 3 sparsity coefficients) in parallel.</p>"},{"location":"users/sweeps/#why-parallel-sweeps","title":"Why Parallel Sweeps?","text":"<p>SAE training is bottlenecked by disk I/O, not GPU computation. Loading terabytes of pre-computed ViT activations from disk is the slowest part. By training multiple SAE configurations on the same batch simultaneously, we amortize the I/O cost:</p> <pre><code>\u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n\u2502 ViT Activations (disk) \u2502\n\u2514\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2518\n            \u2502 (slow I/O, once per batch)\n            \u25bc\n      \u250c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n      \u2502  Batch   \u2502\n      \u2514\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2518\n            \u251c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u252c\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2510\n            \u25bc         \u25bc         \u25bc         \u25bc\n         SAE #1    SAE #2    SAE #3     ...\n        (lr=3e-4) (lr=1e-3) (lr=3e-3)\n</code></pre>"},{"location":"users/sweeps/#sweep-configuration","title":"Sweep Configuration","text":""},{"location":"users/sweeps/#python-based-sweeps","title":"Python-Based Sweeps","text":"<p>Python sweeps give you full control over config generation. Your sweep file must define a <code>make_cfgs()</code> function that returns a list of dicts.</p> <p>Grid search example:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for lr in [1e-4, 3e-4, 1e-3]:\n        for d_sae in [8192, 16384, 32768]:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Paired parameters (not a grid):</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # Grid over lr x sparsity\n    for lr in [3e-4, 1e-3, 3e-3]:\n        for sparsity in [4e-4, 8e-4, 1.6e-3]:\n            # Paired layers (train and val use same layer)\n            for layer in [6, 7, 8, 9, 10, 11]:\n                cfg = {\n                    \"lr\": lr,\n                    \"objective\": {\"sparsity_coeff\": sparsity},\n                    \"train_data\": {\"layer\": layer},\n                    \"val_data\": {\"layer\": layer},\n                }\n                cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This generates 54 configs (3 x 3 x 6) where each train/val pair uses the same layer, avoiding the 162 configs you'd get from a full grid (3 x 3 x 6 x 6).</p> <p>Conditional sweeps:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    for d_sae in [8192, 16384, 32768]:\n        # Use different LR for different SAE widths\n        lrs = [1e-3, 3e-3] if d_sae &lt;= 16384 else [3e-4, 1e-3]\n\n        for lr in lrs:\n            cfg = {\n                \"lr\": lr,\n                \"sae\": {\"d_sae\": d_sae},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"},{"location":"users/sweeps/#command-line-overrides","title":"Command-Line Overrides","text":"<p>Command-line arguments override sweep parameters with deep merging. The precedence order is: CLI &gt; Sweep &gt; Default.</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --lr 5e-4  # Overrides all LRs in the sweep\n</code></pre> <p>Override nested config fields with dotted notation:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py \\\n  --train-data.layer 23 \\\n  --val-data.layer 23 \\\n  --sae.d-sae 16384\n</code></pre> <p>Deep merging means that when you override a nested field, only that specific field is replaced\u2014other fields in the nested config are preserved from the sweep or default values.</p>"},{"location":"users/sweeps/#parallel-groups","title":"Parallel Groups","text":"<p>Not all parameters can vary within a parallel sweep. Parameters that affect data loading (like <code>train_data</code>, <code>n_train</code>, <code>device</code>) must be identical across all configs in a parallel group.</p> <p>When configs differ in these parameters, they're automatically split into separate Slurm jobs:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # These will run in 2 separate jobs\n    for layer in [6, 12]:  # Different data loading\n        for lr in [1e-4, 3e-4]:  # Can parallelize\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>This creates 2 parallel groups: - Job 1: layer=6, lr=[1e-4, 3e-4] - Job 2: layer=12, lr=[1e-4, 3e-4]</p> <p>Implementation detail</p> <p>See <code>CANNOT_PARALLELIZE</code> in <code>train.py</code> for the full list of parameters that split parallel groups. The <code>split_cfgs()</code> function handles grouping automatically.</p>"},{"location":"users/sweeps/#module-loading","title":"Module Loading","text":"<p>Your sweep file is executed as a Python module, so you can use imports and helper functions:</p> <pre><code>def make_cfgs():\n    cfgs = []\n\n    # You can use helper functions\n    base_layers = list(range(6, 24, 2))\n\n    for layer in base_layers:\n        for lr in [1e-4, 3e-4]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"val_data\": {\"layer\": layer, \"n_threads\": 8},\n                \"sae\": {\"d_model\": 1024, \"d_sae\": 16384},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Import mechanics</p> <p>The sweep file is loaded with <code>importlib.import_module()</code>, so it must be importable as a Python module. Place sweep files in a location where Python can find them (typically the project root or a <code>sweeps/</code> subdirectory).</p>"},{"location":"users/sweeps/#slurm-integration","title":"Slurm Integration","text":"<p>When running with <code>--slurm-acct</code>, each parallel group becomes a separate Slurm job:</p> <pre><code>uv run train.py --sweep sweeps/large.py \\\n  --slurm-acct PAS2136 \\\n  --slurm-partition nextgen \\\n  --n-hours 24\n</code></pre> <p>The system automatically: - Groups configs that can parallelize - Submits one Slurm job per group - Waits for all jobs to complete - Reports results</p>"},{"location":"users/sweeps/#seed-management","title":"Seed Management","text":"<p>Seeds are automatically incremented for each config to ensure reproducibility:</p> <pre><code># Base config has seed=42\n# Sweep generates 9 configs with seeds: 42, 43, 44, ..., 50\n</code></pre> <p>Override the base seed on the command line:</p> <pre><code>uv run train.py --sweep sweeps/my_sweep.py --seed 100\n</code></pre>"},{"location":"users/sweeps/#examples","title":"Examples","text":"<p>Simple grid:</p> <pre><code># sweeps/simple.py\ndef make_cfgs():\n    return [\n        {\"lr\": lr, \"objective\": {\"sparsity_coeff\": sp}}\n        for lr in [1e-4, 3e-4, 1e-3]\n        for sp in [4e-4, 8e-4, 1.6e-3]\n    ]\n</code></pre> <p>Layer sweep with paired train/val:</p> <pre><code># sweeps/layers.py\ndef make_cfgs():\n    cfgs = []\n\n    for layer in range(6, 24, 2):  # Layers 6, 8, 10, ..., 22\n        for lr in [3e-4, 1e-3]:\n            cfg = {\n                \"lr\": lr,\n                \"train_data\": {\"layer\": layer},\n                \"val_data\": {\"layer\": layer},\n            }\n            cfgs.append(cfg)\n\n    return cfgs\n</code></pre> <p>Architecture sweep:</p> <pre><code># sweeps/architecture.py\ndef make_cfgs():\n    cfgs = []\n\n    architectures = [\n        (\"small\", 8192, 1e-3),\n        (\"medium\", 16384, 5e-4),\n        (\"large\", 32768, 3e-4),\n    ]\n\n    for name, d_sae, lr in architectures:\n        cfg = {\n            \"lr\": lr,\n            \"sae\": {\"d_sae\": d_sae},\n            \"tag\": name,\n        }\n        cfgs.append(cfg)\n\n    return cfgs\n</code></pre>"}]}
\ No newline at end of file
diff --git a/docs/api/sitemap.xml b/docs/api/sitemap.xml
index d9f457c..0f1c9aa 100644
--- a/docs/api/sitemap.xml
+++ b/docs/api/sitemap.xml
@@ -2,198 +2,198 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://imageomics.github.io/saev/api/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/colors/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/configs/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/disk/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/helpers/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/metrics/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/saev/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/summary/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/viz/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/bird_mae/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/buffers/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/clip/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/datasets/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/dinov2/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/dinov3/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/fake_clip/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/indexed/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/models/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/ordered/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/pe/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/saev.data/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/shards/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/shuffled/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/siglip/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/data/transforms/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/framework/inference/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/framework/saev.framework/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/framework/shards/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/framework/train/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/nn/modeling/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/nn/objectives/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/nn/saev.nn/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/utils/monitoring/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/utils/saev.utils/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/utils/scheduling/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/utils/statistics/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/api/utils/wandb/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/developers/contributing/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/developers/datapoint-init/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/developers/disk-layout/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/developers/naming/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/developers/protocol/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/developers/workflows/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/users/bird-mae-debugging/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/users/glossary/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/users/guide/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/users/inference/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/users/new-project/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
     <url>
          <loc>https://imageomics.github.io/saev/api/users/sweeps/</loc>
-         <lastmod>2026-03-06</lastmod>
+         <lastmod>2026-06-12</lastmod>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/docs/api/sitemap.xml.gz b/docs/api/sitemap.xml.gz
index 1e7c75db1d70f9824c2feeb5db6d644a0c439975..12d1a517173df19e83d1289da6c055b1920dae41 100644
GIT binary patch
literal 553
zcmV+^0@nQ>iwFn+Oe<;v|8r?{Wo=<_E_iKh0L_@ej+`(IK=1n%BkqvdskCaVnb}*v
zpgpewG2opfMsa}Meti=db@$vJ93&tK!G4PEILZ0>&Dp~Ofsy>BdTfr>0enXW{cx#%
z{rpjXt)8zB=Q&u0cu1W+U#c<2>AO<vx;BgvoSwL$FevWERAF^*RLAC9b$vLbD+}05
z+jX|oU9%KUV$kUU$YICLfN`AL21&7oMd=CES>?W;f7Kivnd|9zda934_2VgZxwq}Q
z)L&))t~TEtSqdym;CmcI5e)K8QvV2c0$D3Kh#`HJMA#~l`e)~Kc$Y+2@V96j=%`hT
z!Mr#P@*s2H3rEGq(OI0tA@U<-C;8DxS(BDi85m#0F?xN2mm(Yly@59{;v7jVSa~<4
zU?3DQ9FzP!np>K!Bu+BwV7LH`lv!*6&cQg8Fm=SBJ@*uxsA>A39jp{?X(~&3Kkd+<
z44f@~KB{FfiAT~0OL8Cdk|^HqStsr1g)yjog)ZU(EDW;wtQWP)95Jl}9C?fsgiW)v
zY=g2F<ypEB%xudTm#8Ch6yE;tEUWgWmBKKvfTisyrXwE%wh2nkLZ5JQo}LJOtTDw?
z2|&oV+Uj?jOQ7-Em0_nTP^6A*0Vp{)_m)<%pq@En<m05;X{Zgf^DqdSY3CU%F$*5^
r-6>`?Ad4Wr%HA<NtaXZ-yl#>O;T2%|k6_te4DtF0plf?|a2fyr;dBXO

literal 556
zcmV+{0@M8;iwFn+8LDXl|8r?{Wo=<_E_iKh0L_@qZk#X>fbV&V6?b5}k)pQR-Q4;F
z?Kv`dfXUh(<gp>?)7OSolY0&xB*3=J_|yD&%$%R!tT`;;8OdF$Z_Tkffa^)48!y$L
z-@oeb)${e?9KB%(hcwyorJ6#Rf41$qt_@=Vt7dM<k1h98>Tr7>Y>&;4>iTd<R~E3h
zw(D%Dzh)_%#1O~YnYkW4#km<VOtEW_T2`>MYR2~5=INimRvawp>*;uUs*jKL@hMHY
zkL}B~|JVD{%K7fekmuqMZosiLg+{&=Mwr0NP~03Ag2%oz#nvf|5Uo}Iy)ease@jP(
z3aahU>ldp)1WO!-@^P}E^af`M9Ql!wm<XMu{7TD749TMel0n_zMT)ba)9?mb0!oq$
zMugXr*WhI+Wfrl^=8<M=Nr0J@*DQl1WgjEMQ5Z)dTT1ZC@j%{|w$b2~g_Zm*?Q-dm
zkpA~T4AB-*A6+q65@PEEC%F>3!Zgn9nF-xko`kwr=mRdm<ykftcBRb|1ExcRC5M50
zc^mCGn`-u^2%LTb9gP_61Qn1&`NG1Evnr>%f+QLiFtk;BdUC;Im$-_W>Jw4U=rf@U
zHKqs?!i0RQje4gj1js2X#$dCjKs^~5G&ze8hSrji9yycbqE&4b)Ec@tj%Dq&BaMcb
u%T^WdAQ3ebr;u<+-$@*-b&8{W)GE%xE5Q7P@Y!Ar;rb5}y9l!b8vp?Qm>MYn

diff --git a/docs/api/users/guide/index.html b/docs/api/users/guide/index.html
index e6fa015..738a78a 100644
--- a/docs/api/users/guide/index.html
+++ b/docs/api/users/guide/index.html
@@ -2261,7 +2261,7 @@ <h3 id="parallelized-training-architecture">Parallelized Training Architecture<a
 </ul>
 <h3 id="running-a-sweep">Running a Sweep<a class="headerlink" href="#running-a-sweep" title="Permanent link">&para;</a></h3>
 <p>The <code>train</code> command accepts a <code>--sweep</code> parameter that points to a TOML file defining the hyperparameter grid:</p>
-<pre><code class="language-bash">uv run python -m saev train --sweep configs/my_sweep.toml
+<pre><code class="language-bash">uv run launch.py train --sweep configs/my_sweep.toml
 </code></pre>
 <p>Here's an example sweep configuration file:</p>
 <pre><code class="language-toml">[sae]
diff --git a/docs/api/users/sweeps/index.html b/docs/api/users/sweeps/index.html
index 35da9bd..10e2e85 100644
--- a/docs/api/users/sweeps/index.html
+++ b/docs/api/users/sweeps/index.html
@@ -2095,6 +2095,7 @@ <h2 id="quick-start">Quick Start<a class="headerlink" href="#quick-start" title=
 <p>Create a Python file defining your sweep:</p>
 <pre><code class="language-python"># sweeps/my_sweep.py
 
+
 def make_cfgs() -&gt; list[dict]:
     cfgs = []
 

From 8aedea499d14027b94f8c3b3b63d333e1ae0928b Mon Sep 17 00:00:00 2001
From: Matthew Thompson <thompson.4509@osu.edu>
Date: Fri, 12 Jun 2026 12:32:37 -0400
Subject: [PATCH 6/9] Format with latest ruff (0.15.17) as part of docs build

---
 contrib/birdsong/notebooks/001_explore.py     |    3 +-
 contrib/birdsong/notebooks/clips.py           |    6 +-
 .../freshwater_fish/scripts/make_gallery.py   |   59 +-
 .../interactive_interp/semseg/interactive.py  |    3 +-
 contrib/interactive_interp/semseg/training.py |    3 +-
 examples/inference.py                         | 1543 ++++++++---------
 6 files changed, 827 insertions(+), 790 deletions(-)

diff --git a/contrib/birdsong/notebooks/001_explore.py b/contrib/birdsong/notebooks/001_explore.py
index 37fed19..102b2ba 100644
--- a/contrib/birdsong/notebooks/001_explore.py
+++ b/contrib/birdsong/notebooks/001_explore.py
@@ -270,7 +270,8 @@ def _finalize_df(rows: list[dict[str, object]]):
         )
 
         df = (
-            df.unnest("config/sae", "config/train_data/metadata", separator="/")
+            df
+            .unnest("config/sae", "config/train_data/metadata", separator="/")
             .unnest("config/sae/activation", separator="/")
             .unnest(
                 "config/sae/activation/aux",
diff --git a/contrib/birdsong/notebooks/clips.py b/contrib/birdsong/notebooks/clips.py
index cd6f88e..637616a 100644
--- a/contrib/birdsong/notebooks/clips.py
+++ b/contrib/birdsong/notebooks/clips.py
@@ -92,7 +92,8 @@ def add_target(obs: pl.DataFrame, fields: list[str]) -> pl.DataFrame:
         obs = obs.with_columns([pl.col(field).fill_null("unknown") for field in fields])
 
         combos = (
-            obs.select(fields)
+            obs
+            .select(fields)
             .unique(maintain_order=True)  # first-seen ordering
             .with_columns(pl.arange(0, pl.len(), dtype=pl.Int32).alias("target"))
         )
@@ -101,7 +102,8 @@ def add_target(obs: pl.DataFrame, fields: list[str]) -> pl.DataFrame:
 
         target2fields = {
             target: tuple(rest)
-            for target, *rest in obs.unique(pl.col("target"))
+            for target, *rest in obs
+            .unique(pl.col("target"))
             .select("target", *fields)
             .iter_rows()
         }
diff --git a/contrib/freshwater_fish/scripts/make_gallery.py b/contrib/freshwater_fish/scripts/make_gallery.py
index 4a2d7fe..d77eeda 100644
--- a/contrib/freshwater_fish/scripts/make_gallery.py
+++ b/contrib/freshwater_fish/scripts/make_gallery.py
@@ -74,9 +74,7 @@ def build_features(
     """Collect features that have pre-rendered images on disk."""
     features = []
     available = {
-        int(d.name)
-        for d in images_dpath.iterdir()
-        if d.is_dir() and d.name.isdigit()
+        int(d.name) for d in images_dpath.iterdir() if d.is_dir() and d.name.isdigit()
     }
 
     for row in var_df.iter_rows(named=True):
@@ -275,15 +273,49 @@ def build_features(
 
 def main():
     parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--run", type=pathlib.Path, required=True, help="Run directory (e.g. /fs/ess/.../runs/um6hbn05)")
-    parser.add_argument("--shard", type=str, required=True, help="Shard ID (e.g. 8692dfa9)")
-    parser.add_argument("--dataset", type=pathlib.Path, default=None, help="Dataset root (segfolder with images/ dir, for label ordering)")
-    parser.add_argument("--split", type=str, default="validation", help="Dataset split name (default: validation)")
-    parser.add_argument("--hf-config", type=str, default="trait_segmentation", help="HuggingFace FishVista config name")
-    parser.add_argument("--hf-split", type=str, default="val", help="HuggingFace split name (default: val)")
-    parser.add_argument("--out", type=pathlib.Path, default=pathlib.Path("gallery.html"), help="Output HTML path")
+    parser.add_argument(
+        "--run",
+        type=pathlib.Path,
+        required=True,
+        help="Run directory (e.g. /fs/ess/.../runs/um6hbn05)",
+    )
+    parser.add_argument(
+        "--shard", type=str, required=True, help="Shard ID (e.g. 8692dfa9)"
+    )
+    parser.add_argument(
+        "--dataset",
+        type=pathlib.Path,
+        default=None,
+        help="Dataset root (segfolder with images/ dir, for label ordering)",
+    )
+    parser.add_argument(
+        "--split",
+        type=str,
+        default="validation",
+        help="Dataset split name (default: validation)",
+    )
+    parser.add_argument(
+        "--hf-config",
+        type=str,
+        default="trait_segmentation",
+        help="HuggingFace FishVista config name",
+    )
+    parser.add_argument(
+        "--hf-split",
+        type=str,
+        default="val",
+        help="HuggingFace split name (default: val)",
+    )
+    parser.add_argument(
+        "--out",
+        type=pathlib.Path,
+        default=pathlib.Path("gallery.html"),
+        help="Output HTML path",
+    )
     parser.add_argument("--quality", type=int, default=80, help="JPEG quality (0-100)")
-    parser.add_argument("--title", type=str, default="", help="Gallery subtitle/description")
+    parser.add_argument(
+        "--title", type=str, default="", help="Gallery subtitle/description"
+    )
     args = parser.parse_args()
 
     inference_dpath = args.run / "inference" / args.shard
@@ -313,7 +345,10 @@ def main():
     n_imgs = sum(len(f["images"]) for f in features)
     logger.info("Total images: %d", n_imgs)
 
-    title = args.title or f"SAE run {args.run.name}, shard {args.shard} | {len(features)} features, {n_imgs} images"
+    title = (
+        args.title
+        or f"SAE run {args.run.name}, shard {args.shard} | {len(features)} features, {n_imgs} images"
+    )
     html = HTML_TEMPLATE.replace("FEATURES_JSON", json.dumps(features))
     html = html.replace("TITLE_PLACEHOLDER", title)
     html = html.replace("RUN_ID_PLACEHOLDER", args.run.name)
diff --git a/contrib/interactive_interp/semseg/interactive.py b/contrib/interactive_interp/semseg/interactive.py
index 6a0ed31..661e34f 100644
--- a/contrib/interactive_interp/semseg/interactive.py
+++ b/contrib/interactive_interp/semseg/interactive.py
@@ -570,7 +570,8 @@ def make_upsampled_pred(
         logits_WHC: Float[Tensor, "width height classes"],
     ) -> Uint8[Tensor, "width height"]:
         return (
-            torch.nn.functional.interpolate(
+            torch.nn.functional
+            .interpolate(
                 logits_WHC.max(axis=-1).indices.view((1, 1, 16, 16)).float(),
                 scale_factor=14,
             )
diff --git a/contrib/interactive_interp/semseg/training.py b/contrib/interactive_interp/semseg/training.py
index 7b5979e..42ffe9b 100644
--- a/contrib/interactive_interp/semseg/training.py
+++ b/contrib/interactive_interp/semseg/training.py
@@ -358,7 +358,8 @@ def __getitem__(self, i: int) -> dict[str, object]:
 
         pw, ph = self.patch_size_px
         patch_labels = (
-            einops.rearrange(pixel_labels, "(w pw) (h ph) -> w h (pw ph)", pw=pw, ph=ph)
+            einops
+            .rearrange(pixel_labels, "(w pw) (h ph) -> w h (pw ph)", pw=pw, ph=ph)
             .mode(axis=-1)
             .values
         )
diff --git a/examples/inference.py b/examples/inference.py
index 2b44390..d143132 100644
--- a/examples/inference.py
+++ b/examples/inference.py
@@ -213,778 +213,776 @@ def _():
 def _():
     DINOV2_IMAGENET1K_SCALAR = 2.0181241035461426
 
-    DINOV2_IMAGENET1K_MEAN = torch.tensor(
-        [
-            0.1450997292995453,
-            -1.0630134344100952,
-            -0.3518574833869934,
-            -0.38624095916748047,
-            -0.4866980314254761,
-            -0.28983384370803833,
-            0.9997676014900208,
-            -1.231179118156433,
-            -0.7889889478683472,
-            -0.4450306296348572,
-            -0.09231726080179214,
-            0.13243812322616577,
-            0.09571082890033722,
-            -0.29342857003211975,
-            0.05933428555727005,
-            -0.21923032402992249,
-            0.08959043025970459,
-            -0.6981018781661987,
-            0.4853704869747162,
-            -0.29948222637176514,
-            0.3107207119464874,
-            -0.3812718093395233,
-            -0.5013473033905029,
-            2.88395094871521,
-            -0.5611682534217834,
-            -0.3514024615287781,
-            0.025546086952090263,
-            -0.24438244104385376,
-            -0.23365195095539093,
-            -0.2533780336380005,
-            0.4445696473121643,
-            1.1176759004592896,
-            -0.4188934564590454,
-            0.09051182866096497,
-            -0.04133417829871178,
-            -0.008052834309637547,
-            -0.5118610858917236,
-            0.22084011137485504,
-            -0.7333402633666992,
-            0.8644523620605469,
-            -0.43727627396583557,
-            -0.22333095967769623,
-            -1.5415295362472534,
-            -0.24187016487121582,
-            -0.33239033818244934,
-            -1.2828021049499512,
-            -0.21485395729541779,
-            0.6667488813400269,
-            -0.25890952348709106,
-            -0.8630414009094238,
-            1.5059994459152222,
-            -0.00952776987105608,
-            0.18695995211601257,
-            0.0200128685683012,
-            -0.221832275390625,
-            1.2800148725509644,
-            -0.1416555792093277,
-            0.61446613073349,
-            0.053658585995435715,
-            -0.08877403289079666,
-            1.0190010070800781,
-            -0.308927446603775,
-            -0.3903353214263916,
-            -0.35504740476608276,
-            -0.7907304763793945,
-            -0.18439480662345886,
-            -0.1797204464673996,
-            0.8199827075004578,
-            -0.1736353039741516,
-            -0.16373644769191742,
-            0.7541728019714355,
-            -0.3236996829509735,
-            0.8245170712471008,
-            0.3411649167537689,
-            -0.21873517334461212,
-            -0.7620954513549805,
-            -0.10635858029127121,
-            -0.592278003692627,
-            0.8314691781997681,
-            -0.2021609991788864,
-            -0.24301563203334808,
-            -0.03504444658756256,
-            -0.061244938522577286,
-            -0.36000630259513855,
-            -0.38578882813453674,
-            -1.2314008474349976,
-            -0.3416382968425751,
-            0.5925644636154175,
-            0.32259607315063477,
-            0.13169726729393005,
-            -0.131134033203125,
-            0.05763484537601471,
-            -0.7130515575408936,
-            -0.5685354471206665,
-            0.04428980499505997,
-            0.9245452880859375,
-            0.37724241614341736,
-            -0.4426809549331665,
-            0.5091503262519836,
-            -0.08006338775157928,
-            -0.18945513665676117,
-            -0.770736575126648,
-            -0.3588047921657562,
-            0.04727765917778015,
-            -0.16137081384658813,
-            -0.021555813029408455,
-            0.6381930708885193,
-            0.30161890387535095,
-            -0.0710706040263176,
-            -0.13884945213794708,
-            -0.22726555168628693,
-            -0.6134527921676636,
-            0.2969088852405548,
-            -0.2334780991077423,
-            -0.46334928274154663,
-            -0.3058214485645294,
-            0.5196799039840698,
-            0.6341780424118042,
-            0.12271945178508759,
-            -1.0072089433670044,
-            -0.1198473796248436,
-            -0.24667270481586456,
-            -0.19228138029575348,
-            -0.3955901861190796,
-            -0.19902971386909485,
-            0.7407659292221069,
-            2.3908257484436035,
-            0.02820657566189766,
-            0.07064329087734222,
-            -0.2637694776058197,
-            0.2560977339744568,
-            0.3973558247089386,
-            -0.17345857620239258,
-            -0.9541534185409546,
-            -0.21434728801250458,
-            0.41178393363952637,
-            -0.008175228722393513,
-            0.5115303993225098,
-            -0.9667210578918457,
-            1.6499103307724,
-            -1.8320564031600952,
-            1.1143667697906494,
-            0.24006624519824982,
-            -0.02112947776913643,
-            -0.4952388405799866,
-            1.1000680923461914,
-            -0.4901401102542877,
-            0.22758258879184723,
-            -0.6699370741844177,
-            0.6926363706588745,
-            -0.5719613432884216,
-            0.008403707295656204,
-            2.0220773220062256,
-            -0.1789812445640564,
-            -0.8777256011962891,
-            0.3709064722061157,
-            -0.2629733681678772,
-            0.08407248556613922,
-            -0.27063870429992676,
-            0.09993340820074081,
-            -0.3755860924720764,
-            0.07000888139009476,
-            0.3775370419025421,
-            0.5653945207595825,
-            -0.11404427886009216,
-            -0.06088113784790039,
-            -0.0898045226931572,
-            0.19868576526641846,
-            0.14287644624710083,
-            -0.669394314289093,
-            -0.07882463932037354,
-            -0.12379930168390274,
-            -0.010277876630425453,
-            -0.5625343918800354,
-            -0.6508009433746338,
-            0.06929764896631241,
-            -2.0470166206359863,
-            1.0193544626235962,
-            -0.9747569561004639,
-            -0.25624850392341614,
-            -0.04412469267845154,
-            -0.01941649615764618,
-            0.04781557247042656,
-            -0.2561051845550537,
-            -0.09596704691648483,
-            -1.0529744625091553,
-            -0.32774603366851807,
-            -0.1931363344192505,
-            -0.36885082721710205,
-            -0.9351740479469299,
-            -0.47905397415161133,
-            -0.678762674331665,
-            2.336048126220703,
-            0.26323413848876953,
-            -0.36512619256973267,
-            -0.3650853633880615,
-            -0.8287989497184753,
-            0.5866581201553345,
-            -0.420742005109787,
-            0.008546118624508381,
-            -0.7811568975448608,
-            -0.34993329644203186,
-            -0.373068243265152,
-            0.028424998745322227,
-            -0.537581205368042,
-            -0.15937983989715576,
-            -0.5638740062713623,
-            -0.4413940906524658,
-            -0.05887509509921074,
-            -0.12291032075881958,
-            -0.26565149426460266,
-            -0.23059803247451782,
-            -0.2925986349582672,
-            0.04849022254347801,
-            -0.4770037531852722,
-            0.040383752435445786,
-            -0.8186637759208679,
-            -0.062463242560625076,
-            -0.3251510262489319,
-            -0.4319412112236023,
-            -0.34569647908210754,
-            0.9713658690452576,
-            -0.25668394565582275,
-            -0.37531179189682007,
-            0.5259386301040649,
-            -0.06112021207809448,
-            0.06980857998132706,
-            -0.38363778591156006,
-            -0.1948518007993698,
-            -0.7897586822509766,
-            -0.600932776927948,
-            -0.4269576072692871,
-            -0.32002967596054077,
-            0.08897170424461365,
-            -0.3079395294189453,
-            -0.05779555067420006,
-            -0.782086968421936,
-            1.9608103036880493,
-            0.1145739033818245,
-            0.06164107844233513,
-            -0.3024725317955017,
-            -0.6308553218841553,
-            -0.7640243172645569,
-            -4.433685302734375,
-            -0.31690648198127747,
-            -0.019084235653281212,
-            -0.09761863201856613,
-            -0.029514605179429054,
-            -0.5096182823181152,
-            1.112805962562561,
-            -0.3302820324897766,
-            -0.23730400204658508,
-            0.044646695256233215,
-            -0.805400013923645,
-            -7.766678333282471,
-            -0.2016162872314453,
-            -0.5018128752708435,
-            0.6819560527801514,
-            -0.2735823392868042,
-            -2.2288968563079834,
-            -0.36170846223831177,
-            -0.7745882868766785,
-            0.4644778370857239,
-            0.2525951564311981,
-            -0.22642317414283752,
-            -0.5394997596740723,
-            -0.5064775347709656,
-            -0.5716705918312073,
-            0.19713695347309113,
-            -0.5411649942398071,
-            -0.17092496156692505,
-            0.45778003334999084,
-            0.6894896030426025,
-            -0.21671152114868164,
-            -0.9160588383674622,
-            -0.10307890176773071,
-            0.11703722178936005,
-            -0.7433905601501465,
-            -1.5170584917068481,
-            2.163774013519287,
-            -1.542649507522583,
-            -0.1601075381040573,
-            -0.5249155163764954,
-            0.44509291648864746,
-            -0.5261067152023315,
-            -0.02273540571331978,
-            -0.28311043977737427,
-            0.9144242405891418,
-            0.43954336643218994,
-            -0.2469814419746399,
-            0.18752114474773407,
-            -0.6066163778305054,
-            -0.14480441808700562,
-            -0.3546217679977417,
-            -0.11870954185724258,
-            -0.09891107678413391,
-            -0.377458781003952,
-            0.33304381370544434,
-            -0.156569704413414,
-            -0.9730328321456909,
-            -0.5034677386283875,
-            0.042613230645656586,
-            0.08271210640668869,
-            -0.2368200123310089,
-            -0.07397157698869705,
-            0.011974042281508446,
-            -0.2115129977464676,
-            -0.3752884566783905,
-            -0.24985794723033905,
-            -0.25223013758659363,
-            1.8311675786972046,
-            -0.1650543361902237,
-            -0.031050190329551697,
-            0.10702164471149445,
-            0.8963613510131836,
-            -0.9483885169029236,
-            -0.8156309723854065,
-            -1.7132004499435425,
-            0.08163392543792725,
-            0.4886241555213928,
-            -0.016470594331622124,
-            -0.37671732902526855,
-            -0.025105634704232216,
-            -0.2695018947124481,
-            -0.8450148701667786,
-            -0.9802296757698059,
-            -0.21868866682052612,
-            -0.5872927308082581,
-            1.019242763519287,
-            0.01872517168521881,
-            0.5087792873382568,
-            0.06771136820316315,
-            1.4142885208129883,
-            0.13146139681339264,
-            -0.36489933729171753,
-            0.37572142481803894,
-            -0.3490581810474396,
-            -0.13830198347568512,
-            -1.8019393682479858,
-            1.5129766464233398,
-            0.07059808075428009,
-            1.7206473350524902,
-            0.02890164405107498,
-            0.3628808557987213,
-            0.3914141058921814,
-            0.4993101954460144,
-            0.3969678580760956,
-            -0.058554816991090775,
-            -0.3434300422668457,
-            -0.4157616198062897,
-            -0.7624511122703552,
-            -0.3997197449207306,
-            1.4573990106582642,
-            -0.3363801836967468,
-            -0.46490129828453064,
-            -0.7445303797721863,
-            -0.3460237979888916,
-            -0.6315308809280396,
-            0.8536337018013,
-            -0.08939796686172485,
-            -0.21093742549419403,
-            -0.08742645382881165,
-            -0.020040960982441902,
-            0.09354449808597565,
-            -0.809800386428833,
-            -0.0018062496092170477,
-            -1.0083088874816895,
-            0.3428219258785248,
-            0.012708818539977074,
-            -0.3535612225532532,
-            1.9481208324432373,
-            0.013826621696352959,
-            -0.026771225035190582,
-            0.18734635412693024,
-            0.9365230798721313,
-            1.247671025339514e-05,
-            -0.4420109987258911,
-            0.10769690573215485,
-            -0.6858118176460266,
-            -0.24754805862903595,
-            1.0027467012405396,
-            -0.26436665654182434,
-            -0.33883318305015564,
-            0.38209766149520874,
-            0.479579895734787,
-            -0.5910238027572632,
-            0.1890297830104828,
-            -0.29854580760002136,
-            -0.5636696219444275,
-            -0.504091739654541,
-            -0.32814571261405945,
-            -0.748496949672699,
-            -0.3217906653881073,
-            -0.12439341843128204,
-            -0.3949342668056488,
-            0.09739203751087189,
-            -0.4254276752471924,
-            0.8690429329872131,
-            -0.26380032300949097,
-            -1.2738139629364014,
-            -0.12694764137268066,
-            -0.7331164479255676,
-            0.11337947845458984,
-            -0.7573927640914917,
-            -0.41507089138031006,
-            -0.18960340321063995,
-            1.2390563488006592,
-            -0.10859012603759766,
-            -0.021934548392891884,
-            -0.05041227489709854,
-            -0.055214136838912964,
-            0.20024456083774567,
-            -0.2689618766307831,
-            -0.3135489821434021,
-            -0.07520166784524918,
-            -0.5906742811203003,
-            0.2828388512134552,
-            0.05117213353514671,
-            1.4600849151611328,
-            -0.1967628449201584,
-            0.011182722635567188,
-            0.028878701850771904,
-            -0.12146933376789093,
-            0.6056286096572876,
-            0.22920559346675873,
-            -0.008979334495961666,
-            -0.2874019742012024,
-            -0.4887332320213318,
-            0.8754663467407227,
-            -0.05393843352794647,
-            -0.2956174910068512,
-            -0.18953847885131836,
-            -0.19063766300678253,
-            -0.8141281008720398,
-            0.11052622646093369,
-            -0.020359158515930176,
-            -0.1262499988079071,
-            -1.7762614488601685,
-            -0.4864279627799988,
-            -0.8644945621490479,
-            0.1278448849916458,
-            1.1127605438232422,
-            -0.595068097114563,
-            -0.06630692631006241,
-            1.5608118772506714,
-            -0.9473971724510193,
-            -0.1827543079853058,
-            -0.25564679503440857,
-            -0.4378860294818878,
-            -0.8285927176475525,
-            -1.1397618055343628,
-            -0.06226593255996704,
-            -0.09025824069976807,
-            -0.518083393573761,
-            -0.893482506275177,
-            0.5022943615913391,
-            -0.5922176837921143,
-            0.2571451961994171,
-            0.25571396946907043,
-            0.832092821598053,
-            -0.061823680996894836,
-            -0.08963754773139954,
-            -0.42173218727111816,
-            -0.4375287890434265,
-            -0.43921560049057007,
-            0.5626742243766785,
-            -0.011294233612716198,
-            0.626301646232605,
-            -0.28029197454452515,
-            0.15464802086353302,
-            -0.7071759700775146,
-            -0.0337684191763401,
-            -0.20901329815387726,
-            -0.29788798093795776,
-            0.6644192934036255,
-            -0.049459852278232574,
-            0.039552830159664154,
-            -0.2790898084640503,
-            0.3250356614589691,
-            -0.12668772041797638,
-            -0.46142634749412537,
-            -0.35542988777160645,
-            -1.1817448139190674,
-            0.007615066133439541,
-            -0.43865758180618286,
-            -0.16142761707305908,
-            -0.37852972745895386,
-            -0.582589328289032,
-            0.4371003210544586,
-            -0.2603273391723633,
-            -0.03284638375043869,
-            0.8895729184150696,
-            -0.025997856631875038,
-            0.5761443376541138,
-            -0.28437164425849915,
-            -0.11191761493682861,
-            -0.07794637233018875,
-            0.02127309888601303,
-            -0.10069284588098526,
-            -0.2177346795797348,
-            -1.029278039932251,
-            -0.5014596581459045,
-            -0.5774326920509338,
-            -0.2856050431728363,
-            -0.24715296924114227,
-            0.1243511438369751,
-            0.042631667107343674,
-            -0.846584677696228,
-            -0.7308683395385742,
-            -0.09307371079921722,
-            -0.35250845551490784,
-            0.12801845371723175,
-            -0.5423708558082581,
-            -0.22422067821025848,
-            1.574460744857788,
-            -0.27640238404273987,
-            -0.37266722321510315,
-            -0.12533603608608246,
-            0.3177711069583893,
-            -0.4530303478240967,
-            0.24940718710422516,
-            -0.1272897720336914,
-            0.6882254481315613,
-            -0.2153051793575287,
-            -0.6189695000648499,
-            -0.38704702258110046,
-            -0.14360225200653076,
-            -0.08159925043582916,
-            0.4714410603046417,
-            -0.16035029292106628,
-            0.005880486220121384,
-            -0.5742312669754028,
-            -0.33733850717544556,
-            -0.39702731370925903,
-            -0.14614750444889069,
-            -0.06936132907867432,
-            0.2528288662433624,
-            -0.25900882482528687,
-            0.45907658338546753,
-            -0.20694994926452637,
-            0.4083366394042969,
-            -0.9925484657287598,
-            -0.17098328471183777,
-            0.3215583860874176,
-            -0.33823585510253906,
-            -0.07112737745046616,
-            -0.05322866141796112,
-            0.19237284362316132,
-            -0.6257429122924805,
-            0.23328493535518646,
-            -0.17247024178504944,
-            -0.3362499177455902,
-            -0.17041970789432526,
-            -0.014526017010211945,
-            -0.12138030678033829,
-            0.0698552280664444,
-            -0.609315037727356,
-            0.8142863512039185,
-            -2.295081615447998,
-            -0.07903101295232773,
-            -0.48268306255340576,
-            -0.2097805291414261,
-            -0.4481655955314636,
-            -1.059373378753662,
-            0.17675237357616425,
-            -0.5335419774055481,
-            0.7713444232940674,
-            0.6341530084609985,
-            1.1411781311035156,
-            -0.18365903198719025,
-            -0.4029919505119324,
-            -0.34328755736351013,
-            -1.1935101747512817,
-            -0.4249494671821594,
-            0.10720300674438477,
-            -0.13509584963321686,
-            -0.610278844833374,
-            -0.1007867231965065,
-            -0.13094481825828552,
-            0.3319343030452728,
-            -0.22466504573822021,
-            -0.33384865522384644,
-            -0.3001727759838104,
-            -0.48621413111686707,
-            0.10271137952804565,
-            -0.3953743577003479,
-            -0.3412061631679535,
-            -1.3808176517486572,
-            -0.3035687804222107,
-            0.27737119793891907,
-            -0.10266303271055222,
-            -0.472690224647522,
-            0.03376518189907074,
-            -0.2053908109664917,
-            -0.46477705240249634,
-            -0.0046875146217644215,
-            0.8462978005409241,
-            -0.7554765343666077,
-            -0.9736349582672119,
-            -0.14118513464927673,
-            -0.2665828466415405,
-            -0.9371470212936401,
-            -0.007497116923332214,
-            0.6816821098327637,
-            0.20980679988861084,
-            -0.5602611303329468,
-            -0.7874919176101685,
-            -0.01479698158800602,
-            -0.45345690846443176,
-            -0.12117742747068405,
-            -0.5790822505950928,
-            -0.27737149596214294,
-            0.08818025887012482,
-            -0.25239622592926025,
-            1.1271374225616455,
-            0.0044799973256886005,
-            0.2183203548192978,
-            -2.0634095668792725,
-            -0.007129574194550514,
-            0.32677894830703735,
-            0.019878007471561432,
-            0.060301825404167175,
-            -0.6844122409820557,
-            0.35185739398002625,
-            -0.0028550554998219013,
-            -0.5629953145980835,
-            0.06621643155813217,
-            -0.043473124504089355,
-            -0.3398932218551636,
-            -0.1782192587852478,
-            -0.24575252830982208,
-            -0.20299431681632996,
-            -0.3652290999889374,
-            -0.9888001680374146,
-            -0.30628740787506104,
-            0.6184420585632324,
-            -0.33409008383750916,
-            0.20486755669116974,
-            -0.8251897692680359,
-            -0.08471876382827759,
-            -0.5613390803337097,
-            0.057765014469623566,
-            0.5359746813774109,
-            -0.7063419818878174,
-            0.28122395277023315,
-            -0.004502696450799704,
-            -0.6543170213699341,
-            0.04663177207112312,
-            -0.05775964632630348,
-            -6.37779594399035e-05,
-            0.46121329069137573,
-            -0.004464420489966869,
-            1.4332563877105713,
-            0.20597098767757416,
-            -0.17879879474639893,
-            0.4316228926181793,
-            -1.2352955341339111,
-            -0.19363455474376678,
-            -0.32174810767173767,
-            -0.23037514090538025,
-            0.17044368386268616,
-            0.13070613145828247,
-            1.2171069383621216,
-            -1.171966314315796,
-            0.04596511274576187,
-            -0.1690378040075302,
-            -0.030221890658140182,
-            0.3216114342212677,
-            -0.08577033132314682,
-            -0.26656001806259155,
-            -0.4321160316467285,
-            -0.22010475397109985,
-            -0.6187731623649597,
-            -0.4711909890174866,
-            -0.3499036431312561,
-            0.13558903336524963,
-            -0.2124641239643097,
-            -0.28327351808547974,
-            0.12788993120193481,
-            -1.3083688020706177,
-            -0.0332779586315155,
-            -0.4718656837940216,
-            1.031941533088684,
-            -0.07811620831489563,
-            -0.5331435799598694,
-            -0.2602376341819763,
-            -0.8461449146270752,
-            0.18593788146972656,
-            0.5763140320777893,
-            -0.45714831352233887,
-            -0.1056162416934967,
-            0.2665534019470215,
-            -0.4580163061618805,
-            -0.25224190950393677,
-            -0.2334505170583725,
-            -0.6723064184188843,
-            0.12331533432006836,
-            0.054681699723005295,
-            -0.14116793870925903,
-            -0.10254379361867905,
-            2.0082550048828125,
-            -1.4980225563049316,
-            0.00379346776753664,
-            -0.8470208644866943,
-            0.06866040825843811,
-            -0.3133383095264435,
-            -0.20381635427474976,
-            -0.03295162320137024,
-            1.1624072790145874,
-            -1.2590479850769043,
-            -0.5051106810569763,
-            -0.5310556292533875,
-            0.11350126564502716,
-            -0.5141156315803528,
-            1.0333826541900635,
-            -0.5528491735458374,
-            -0.6508246064186096,
-            -1.0594176054000854,
-            -0.03546600416302681,
-            -0.0008655009442009032,
-            0.06422116607427597,
-            -0.5845358371734619,
-            -0.049052149057388306,
-            -0.578079104423523,
-            -0.46709108352661133,
-            -0.6544204354286194,
-            -0.13105393946170807,
-            -0.12359122931957245,
-            0.19125737249851227,
-            -0.9108084440231323,
-            -0.24640944600105286,
-            -0.5813102126121521,
-            -0.2342103123664856,
-            0.645296573638916,
-            0.4200597405433655,
-            1.030412197113037,
-            0.026015933603048325,
-            0.03929654508829117,
-            -0.18394766747951508,
-            -0.2946997582912445,
-            0.029773380607366562,
-            -1.1292797327041626,
-            -0.3272054195404053,
-            -0.19441728293895721,
-            -0.8372487425804138,
-            0.5765964984893799,
-            -0.28797629475593567,
-            -0.6211466789245605,
-            0.09933445602655411,
-            -0.5617806911468506,
-            1.163861870765686,
-            0.1421220600605011,
-            -0.790323793888092,
-            -0.4003753960132599,
-            -0.6941299438476562,
-            -0.5033494830131531,
-            -0.2234964221715927,
-            -0.12398113310337067,
-            -0.26237404346466064,
-            -0.4991702139377594,
-            -0.7963886260986328,
-            -0.012063371017575264,
-            -1.1415417194366455,
-            0.40668150782585144,
-            0.33048388361930847,
-            1.3195141553878784,
-            -0.0008099540136754513,
-            -0.06793856620788574,
-        ]
-    )
+    DINOV2_IMAGENET1K_MEAN = torch.tensor([
+        0.1450997292995453,
+        -1.0630134344100952,
+        -0.3518574833869934,
+        -0.38624095916748047,
+        -0.4866980314254761,
+        -0.28983384370803833,
+        0.9997676014900208,
+        -1.231179118156433,
+        -0.7889889478683472,
+        -0.4450306296348572,
+        -0.09231726080179214,
+        0.13243812322616577,
+        0.09571082890033722,
+        -0.29342857003211975,
+        0.05933428555727005,
+        -0.21923032402992249,
+        0.08959043025970459,
+        -0.6981018781661987,
+        0.4853704869747162,
+        -0.29948222637176514,
+        0.3107207119464874,
+        -0.3812718093395233,
+        -0.5013473033905029,
+        2.88395094871521,
+        -0.5611682534217834,
+        -0.3514024615287781,
+        0.025546086952090263,
+        -0.24438244104385376,
+        -0.23365195095539093,
+        -0.2533780336380005,
+        0.4445696473121643,
+        1.1176759004592896,
+        -0.4188934564590454,
+        0.09051182866096497,
+        -0.04133417829871178,
+        -0.008052834309637547,
+        -0.5118610858917236,
+        0.22084011137485504,
+        -0.7333402633666992,
+        0.8644523620605469,
+        -0.43727627396583557,
+        -0.22333095967769623,
+        -1.5415295362472534,
+        -0.24187016487121582,
+        -0.33239033818244934,
+        -1.2828021049499512,
+        -0.21485395729541779,
+        0.6667488813400269,
+        -0.25890952348709106,
+        -0.8630414009094238,
+        1.5059994459152222,
+        -0.00952776987105608,
+        0.18695995211601257,
+        0.0200128685683012,
+        -0.221832275390625,
+        1.2800148725509644,
+        -0.1416555792093277,
+        0.61446613073349,
+        0.053658585995435715,
+        -0.08877403289079666,
+        1.0190010070800781,
+        -0.308927446603775,
+        -0.3903353214263916,
+        -0.35504740476608276,
+        -0.7907304763793945,
+        -0.18439480662345886,
+        -0.1797204464673996,
+        0.8199827075004578,
+        -0.1736353039741516,
+        -0.16373644769191742,
+        0.7541728019714355,
+        -0.3236996829509735,
+        0.8245170712471008,
+        0.3411649167537689,
+        -0.21873517334461212,
+        -0.7620954513549805,
+        -0.10635858029127121,
+        -0.592278003692627,
+        0.8314691781997681,
+        -0.2021609991788864,
+        -0.24301563203334808,
+        -0.03504444658756256,
+        -0.061244938522577286,
+        -0.36000630259513855,
+        -0.38578882813453674,
+        -1.2314008474349976,
+        -0.3416382968425751,
+        0.5925644636154175,
+        0.32259607315063477,
+        0.13169726729393005,
+        -0.131134033203125,
+        0.05763484537601471,
+        -0.7130515575408936,
+        -0.5685354471206665,
+        0.04428980499505997,
+        0.9245452880859375,
+        0.37724241614341736,
+        -0.4426809549331665,
+        0.5091503262519836,
+        -0.08006338775157928,
+        -0.18945513665676117,
+        -0.770736575126648,
+        -0.3588047921657562,
+        0.04727765917778015,
+        -0.16137081384658813,
+        -0.021555813029408455,
+        0.6381930708885193,
+        0.30161890387535095,
+        -0.0710706040263176,
+        -0.13884945213794708,
+        -0.22726555168628693,
+        -0.6134527921676636,
+        0.2969088852405548,
+        -0.2334780991077423,
+        -0.46334928274154663,
+        -0.3058214485645294,
+        0.5196799039840698,
+        0.6341780424118042,
+        0.12271945178508759,
+        -1.0072089433670044,
+        -0.1198473796248436,
+        -0.24667270481586456,
+        -0.19228138029575348,
+        -0.3955901861190796,
+        -0.19902971386909485,
+        0.7407659292221069,
+        2.3908257484436035,
+        0.02820657566189766,
+        0.07064329087734222,
+        -0.2637694776058197,
+        0.2560977339744568,
+        0.3973558247089386,
+        -0.17345857620239258,
+        -0.9541534185409546,
+        -0.21434728801250458,
+        0.41178393363952637,
+        -0.008175228722393513,
+        0.5115303993225098,
+        -0.9667210578918457,
+        1.6499103307724,
+        -1.8320564031600952,
+        1.1143667697906494,
+        0.24006624519824982,
+        -0.02112947776913643,
+        -0.4952388405799866,
+        1.1000680923461914,
+        -0.4901401102542877,
+        0.22758258879184723,
+        -0.6699370741844177,
+        0.6926363706588745,
+        -0.5719613432884216,
+        0.008403707295656204,
+        2.0220773220062256,
+        -0.1789812445640564,
+        -0.8777256011962891,
+        0.3709064722061157,
+        -0.2629733681678772,
+        0.08407248556613922,
+        -0.27063870429992676,
+        0.09993340820074081,
+        -0.3755860924720764,
+        0.07000888139009476,
+        0.3775370419025421,
+        0.5653945207595825,
+        -0.11404427886009216,
+        -0.06088113784790039,
+        -0.0898045226931572,
+        0.19868576526641846,
+        0.14287644624710083,
+        -0.669394314289093,
+        -0.07882463932037354,
+        -0.12379930168390274,
+        -0.010277876630425453,
+        -0.5625343918800354,
+        -0.6508009433746338,
+        0.06929764896631241,
+        -2.0470166206359863,
+        1.0193544626235962,
+        -0.9747569561004639,
+        -0.25624850392341614,
+        -0.04412469267845154,
+        -0.01941649615764618,
+        0.04781557247042656,
+        -0.2561051845550537,
+        -0.09596704691648483,
+        -1.0529744625091553,
+        -0.32774603366851807,
+        -0.1931363344192505,
+        -0.36885082721710205,
+        -0.9351740479469299,
+        -0.47905397415161133,
+        -0.678762674331665,
+        2.336048126220703,
+        0.26323413848876953,
+        -0.36512619256973267,
+        -0.3650853633880615,
+        -0.8287989497184753,
+        0.5866581201553345,
+        -0.420742005109787,
+        0.008546118624508381,
+        -0.7811568975448608,
+        -0.34993329644203186,
+        -0.373068243265152,
+        0.028424998745322227,
+        -0.537581205368042,
+        -0.15937983989715576,
+        -0.5638740062713623,
+        -0.4413940906524658,
+        -0.05887509509921074,
+        -0.12291032075881958,
+        -0.26565149426460266,
+        -0.23059803247451782,
+        -0.2925986349582672,
+        0.04849022254347801,
+        -0.4770037531852722,
+        0.040383752435445786,
+        -0.8186637759208679,
+        -0.062463242560625076,
+        -0.3251510262489319,
+        -0.4319412112236023,
+        -0.34569647908210754,
+        0.9713658690452576,
+        -0.25668394565582275,
+        -0.37531179189682007,
+        0.5259386301040649,
+        -0.06112021207809448,
+        0.06980857998132706,
+        -0.38363778591156006,
+        -0.1948518007993698,
+        -0.7897586822509766,
+        -0.600932776927948,
+        -0.4269576072692871,
+        -0.32002967596054077,
+        0.08897170424461365,
+        -0.3079395294189453,
+        -0.05779555067420006,
+        -0.782086968421936,
+        1.9608103036880493,
+        0.1145739033818245,
+        0.06164107844233513,
+        -0.3024725317955017,
+        -0.6308553218841553,
+        -0.7640243172645569,
+        -4.433685302734375,
+        -0.31690648198127747,
+        -0.019084235653281212,
+        -0.09761863201856613,
+        -0.029514605179429054,
+        -0.5096182823181152,
+        1.112805962562561,
+        -0.3302820324897766,
+        -0.23730400204658508,
+        0.044646695256233215,
+        -0.805400013923645,
+        -7.766678333282471,
+        -0.2016162872314453,
+        -0.5018128752708435,
+        0.6819560527801514,
+        -0.2735823392868042,
+        -2.2288968563079834,
+        -0.36170846223831177,
+        -0.7745882868766785,
+        0.4644778370857239,
+        0.2525951564311981,
+        -0.22642317414283752,
+        -0.5394997596740723,
+        -0.5064775347709656,
+        -0.5716705918312073,
+        0.19713695347309113,
+        -0.5411649942398071,
+        -0.17092496156692505,
+        0.45778003334999084,
+        0.6894896030426025,
+        -0.21671152114868164,
+        -0.9160588383674622,
+        -0.10307890176773071,
+        0.11703722178936005,
+        -0.7433905601501465,
+        -1.5170584917068481,
+        2.163774013519287,
+        -1.542649507522583,
+        -0.1601075381040573,
+        -0.5249155163764954,
+        0.44509291648864746,
+        -0.5261067152023315,
+        -0.02273540571331978,
+        -0.28311043977737427,
+        0.9144242405891418,
+        0.43954336643218994,
+        -0.2469814419746399,
+        0.18752114474773407,
+        -0.6066163778305054,
+        -0.14480441808700562,
+        -0.3546217679977417,
+        -0.11870954185724258,
+        -0.09891107678413391,
+        -0.377458781003952,
+        0.33304381370544434,
+        -0.156569704413414,
+        -0.9730328321456909,
+        -0.5034677386283875,
+        0.042613230645656586,
+        0.08271210640668869,
+        -0.2368200123310089,
+        -0.07397157698869705,
+        0.011974042281508446,
+        -0.2115129977464676,
+        -0.3752884566783905,
+        -0.24985794723033905,
+        -0.25223013758659363,
+        1.8311675786972046,
+        -0.1650543361902237,
+        -0.031050190329551697,
+        0.10702164471149445,
+        0.8963613510131836,
+        -0.9483885169029236,
+        -0.8156309723854065,
+        -1.7132004499435425,
+        0.08163392543792725,
+        0.4886241555213928,
+        -0.016470594331622124,
+        -0.37671732902526855,
+        -0.025105634704232216,
+        -0.2695018947124481,
+        -0.8450148701667786,
+        -0.9802296757698059,
+        -0.21868866682052612,
+        -0.5872927308082581,
+        1.019242763519287,
+        0.01872517168521881,
+        0.5087792873382568,
+        0.06771136820316315,
+        1.4142885208129883,
+        0.13146139681339264,
+        -0.36489933729171753,
+        0.37572142481803894,
+        -0.3490581810474396,
+        -0.13830198347568512,
+        -1.8019393682479858,
+        1.5129766464233398,
+        0.07059808075428009,
+        1.7206473350524902,
+        0.02890164405107498,
+        0.3628808557987213,
+        0.3914141058921814,
+        0.4993101954460144,
+        0.3969678580760956,
+        -0.058554816991090775,
+        -0.3434300422668457,
+        -0.4157616198062897,
+        -0.7624511122703552,
+        -0.3997197449207306,
+        1.4573990106582642,
+        -0.3363801836967468,
+        -0.46490129828453064,
+        -0.7445303797721863,
+        -0.3460237979888916,
+        -0.6315308809280396,
+        0.8536337018013,
+        -0.08939796686172485,
+        -0.21093742549419403,
+        -0.08742645382881165,
+        -0.020040960982441902,
+        0.09354449808597565,
+        -0.809800386428833,
+        -0.0018062496092170477,
+        -1.0083088874816895,
+        0.3428219258785248,
+        0.012708818539977074,
+        -0.3535612225532532,
+        1.9481208324432373,
+        0.013826621696352959,
+        -0.026771225035190582,
+        0.18734635412693024,
+        0.9365230798721313,
+        1.247671025339514e-05,
+        -0.4420109987258911,
+        0.10769690573215485,
+        -0.6858118176460266,
+        -0.24754805862903595,
+        1.0027467012405396,
+        -0.26436665654182434,
+        -0.33883318305015564,
+        0.38209766149520874,
+        0.479579895734787,
+        -0.5910238027572632,
+        0.1890297830104828,
+        -0.29854580760002136,
+        -0.5636696219444275,
+        -0.504091739654541,
+        -0.32814571261405945,
+        -0.748496949672699,
+        -0.3217906653881073,
+        -0.12439341843128204,
+        -0.3949342668056488,
+        0.09739203751087189,
+        -0.4254276752471924,
+        0.8690429329872131,
+        -0.26380032300949097,
+        -1.2738139629364014,
+        -0.12694764137268066,
+        -0.7331164479255676,
+        0.11337947845458984,
+        -0.7573927640914917,
+        -0.41507089138031006,
+        -0.18960340321063995,
+        1.2390563488006592,
+        -0.10859012603759766,
+        -0.021934548392891884,
+        -0.05041227489709854,
+        -0.055214136838912964,
+        0.20024456083774567,
+        -0.2689618766307831,
+        -0.3135489821434021,
+        -0.07520166784524918,
+        -0.5906742811203003,
+        0.2828388512134552,
+        0.05117213353514671,
+        1.4600849151611328,
+        -0.1967628449201584,
+        0.011182722635567188,
+        0.028878701850771904,
+        -0.12146933376789093,
+        0.6056286096572876,
+        0.22920559346675873,
+        -0.008979334495961666,
+        -0.2874019742012024,
+        -0.4887332320213318,
+        0.8754663467407227,
+        -0.05393843352794647,
+        -0.2956174910068512,
+        -0.18953847885131836,
+        -0.19063766300678253,
+        -0.8141281008720398,
+        0.11052622646093369,
+        -0.020359158515930176,
+        -0.1262499988079071,
+        -1.7762614488601685,
+        -0.4864279627799988,
+        -0.8644945621490479,
+        0.1278448849916458,
+        1.1127605438232422,
+        -0.595068097114563,
+        -0.06630692631006241,
+        1.5608118772506714,
+        -0.9473971724510193,
+        -0.1827543079853058,
+        -0.25564679503440857,
+        -0.4378860294818878,
+        -0.8285927176475525,
+        -1.1397618055343628,
+        -0.06226593255996704,
+        -0.09025824069976807,
+        -0.518083393573761,
+        -0.893482506275177,
+        0.5022943615913391,
+        -0.5922176837921143,
+        0.2571451961994171,
+        0.25571396946907043,
+        0.832092821598053,
+        -0.061823680996894836,
+        -0.08963754773139954,
+        -0.42173218727111816,
+        -0.4375287890434265,
+        -0.43921560049057007,
+        0.5626742243766785,
+        -0.011294233612716198,
+        0.626301646232605,
+        -0.28029197454452515,
+        0.15464802086353302,
+        -0.7071759700775146,
+        -0.0337684191763401,
+        -0.20901329815387726,
+        -0.29788798093795776,
+        0.6644192934036255,
+        -0.049459852278232574,
+        0.039552830159664154,
+        -0.2790898084640503,
+        0.3250356614589691,
+        -0.12668772041797638,
+        -0.46142634749412537,
+        -0.35542988777160645,
+        -1.1817448139190674,
+        0.007615066133439541,
+        -0.43865758180618286,
+        -0.16142761707305908,
+        -0.37852972745895386,
+        -0.582589328289032,
+        0.4371003210544586,
+        -0.2603273391723633,
+        -0.03284638375043869,
+        0.8895729184150696,
+        -0.025997856631875038,
+        0.5761443376541138,
+        -0.28437164425849915,
+        -0.11191761493682861,
+        -0.07794637233018875,
+        0.02127309888601303,
+        -0.10069284588098526,
+        -0.2177346795797348,
+        -1.029278039932251,
+        -0.5014596581459045,
+        -0.5774326920509338,
+        -0.2856050431728363,
+        -0.24715296924114227,
+        0.1243511438369751,
+        0.042631667107343674,
+        -0.846584677696228,
+        -0.7308683395385742,
+        -0.09307371079921722,
+        -0.35250845551490784,
+        0.12801845371723175,
+        -0.5423708558082581,
+        -0.22422067821025848,
+        1.574460744857788,
+        -0.27640238404273987,
+        -0.37266722321510315,
+        -0.12533603608608246,
+        0.3177711069583893,
+        -0.4530303478240967,
+        0.24940718710422516,
+        -0.1272897720336914,
+        0.6882254481315613,
+        -0.2153051793575287,
+        -0.6189695000648499,
+        -0.38704702258110046,
+        -0.14360225200653076,
+        -0.08159925043582916,
+        0.4714410603046417,
+        -0.16035029292106628,
+        0.005880486220121384,
+        -0.5742312669754028,
+        -0.33733850717544556,
+        -0.39702731370925903,
+        -0.14614750444889069,
+        -0.06936132907867432,
+        0.2528288662433624,
+        -0.25900882482528687,
+        0.45907658338546753,
+        -0.20694994926452637,
+        0.4083366394042969,
+        -0.9925484657287598,
+        -0.17098328471183777,
+        0.3215583860874176,
+        -0.33823585510253906,
+        -0.07112737745046616,
+        -0.05322866141796112,
+        0.19237284362316132,
+        -0.6257429122924805,
+        0.23328493535518646,
+        -0.17247024178504944,
+        -0.3362499177455902,
+        -0.17041970789432526,
+        -0.014526017010211945,
+        -0.12138030678033829,
+        0.0698552280664444,
+        -0.609315037727356,
+        0.8142863512039185,
+        -2.295081615447998,
+        -0.07903101295232773,
+        -0.48268306255340576,
+        -0.2097805291414261,
+        -0.4481655955314636,
+        -1.059373378753662,
+        0.17675237357616425,
+        -0.5335419774055481,
+        0.7713444232940674,
+        0.6341530084609985,
+        1.1411781311035156,
+        -0.18365903198719025,
+        -0.4029919505119324,
+        -0.34328755736351013,
+        -1.1935101747512817,
+        -0.4249494671821594,
+        0.10720300674438477,
+        -0.13509584963321686,
+        -0.610278844833374,
+        -0.1007867231965065,
+        -0.13094481825828552,
+        0.3319343030452728,
+        -0.22466504573822021,
+        -0.33384865522384644,
+        -0.3001727759838104,
+        -0.48621413111686707,
+        0.10271137952804565,
+        -0.3953743577003479,
+        -0.3412061631679535,
+        -1.3808176517486572,
+        -0.3035687804222107,
+        0.27737119793891907,
+        -0.10266303271055222,
+        -0.472690224647522,
+        0.03376518189907074,
+        -0.2053908109664917,
+        -0.46477705240249634,
+        -0.0046875146217644215,
+        0.8462978005409241,
+        -0.7554765343666077,
+        -0.9736349582672119,
+        -0.14118513464927673,
+        -0.2665828466415405,
+        -0.9371470212936401,
+        -0.007497116923332214,
+        0.6816821098327637,
+        0.20980679988861084,
+        -0.5602611303329468,
+        -0.7874919176101685,
+        -0.01479698158800602,
+        -0.45345690846443176,
+        -0.12117742747068405,
+        -0.5790822505950928,
+        -0.27737149596214294,
+        0.08818025887012482,
+        -0.25239622592926025,
+        1.1271374225616455,
+        0.0044799973256886005,
+        0.2183203548192978,
+        -2.0634095668792725,
+        -0.007129574194550514,
+        0.32677894830703735,
+        0.019878007471561432,
+        0.060301825404167175,
+        -0.6844122409820557,
+        0.35185739398002625,
+        -0.0028550554998219013,
+        -0.5629953145980835,
+        0.06621643155813217,
+        -0.043473124504089355,
+        -0.3398932218551636,
+        -0.1782192587852478,
+        -0.24575252830982208,
+        -0.20299431681632996,
+        -0.3652290999889374,
+        -0.9888001680374146,
+        -0.30628740787506104,
+        0.6184420585632324,
+        -0.33409008383750916,
+        0.20486755669116974,
+        -0.8251897692680359,
+        -0.08471876382827759,
+        -0.5613390803337097,
+        0.057765014469623566,
+        0.5359746813774109,
+        -0.7063419818878174,
+        0.28122395277023315,
+        -0.004502696450799704,
+        -0.6543170213699341,
+        0.04663177207112312,
+        -0.05775964632630348,
+        -6.37779594399035e-05,
+        0.46121329069137573,
+        -0.004464420489966869,
+        1.4332563877105713,
+        0.20597098767757416,
+        -0.17879879474639893,
+        0.4316228926181793,
+        -1.2352955341339111,
+        -0.19363455474376678,
+        -0.32174810767173767,
+        -0.23037514090538025,
+        0.17044368386268616,
+        0.13070613145828247,
+        1.2171069383621216,
+        -1.171966314315796,
+        0.04596511274576187,
+        -0.1690378040075302,
+        -0.030221890658140182,
+        0.3216114342212677,
+        -0.08577033132314682,
+        -0.26656001806259155,
+        -0.4321160316467285,
+        -0.22010475397109985,
+        -0.6187731623649597,
+        -0.4711909890174866,
+        -0.3499036431312561,
+        0.13558903336524963,
+        -0.2124641239643097,
+        -0.28327351808547974,
+        0.12788993120193481,
+        -1.3083688020706177,
+        -0.0332779586315155,
+        -0.4718656837940216,
+        1.031941533088684,
+        -0.07811620831489563,
+        -0.5331435799598694,
+        -0.2602376341819763,
+        -0.8461449146270752,
+        0.18593788146972656,
+        0.5763140320777893,
+        -0.45714831352233887,
+        -0.1056162416934967,
+        0.2665534019470215,
+        -0.4580163061618805,
+        -0.25224190950393677,
+        -0.2334505170583725,
+        -0.6723064184188843,
+        0.12331533432006836,
+        0.054681699723005295,
+        -0.14116793870925903,
+        -0.10254379361867905,
+        2.0082550048828125,
+        -1.4980225563049316,
+        0.00379346776753664,
+        -0.8470208644866943,
+        0.06866040825843811,
+        -0.3133383095264435,
+        -0.20381635427474976,
+        -0.03295162320137024,
+        1.1624072790145874,
+        -1.2590479850769043,
+        -0.5051106810569763,
+        -0.5310556292533875,
+        0.11350126564502716,
+        -0.5141156315803528,
+        1.0333826541900635,
+        -0.5528491735458374,
+        -0.6508246064186096,
+        -1.0594176054000854,
+        -0.03546600416302681,
+        -0.0008655009442009032,
+        0.06422116607427597,
+        -0.5845358371734619,
+        -0.049052149057388306,
+        -0.578079104423523,
+        -0.46709108352661133,
+        -0.6544204354286194,
+        -0.13105393946170807,
+        -0.12359122931957245,
+        0.19125737249851227,
+        -0.9108084440231323,
+        -0.24640944600105286,
+        -0.5813102126121521,
+        -0.2342103123664856,
+        0.645296573638916,
+        0.4200597405433655,
+        1.030412197113037,
+        0.026015933603048325,
+        0.03929654508829117,
+        -0.18394766747951508,
+        -0.2946997582912445,
+        0.029773380607366562,
+        -1.1292797327041626,
+        -0.3272054195404053,
+        -0.19441728293895721,
+        -0.8372487425804138,
+        0.5765964984893799,
+        -0.28797629475593567,
+        -0.6211466789245605,
+        0.09933445602655411,
+        -0.5617806911468506,
+        1.163861870765686,
+        0.1421220600605011,
+        -0.790323793888092,
+        -0.4003753960132599,
+        -0.6941299438476562,
+        -0.5033494830131531,
+        -0.2234964221715927,
+        -0.12398113310337067,
+        -0.26237404346466064,
+        -0.4991702139377594,
+        -0.7963886260986328,
+        -0.012063371017575264,
+        -1.1415417194366455,
+        0.40668150782585144,
+        0.33048388361930847,
+        1.3195141553878784,
+        -0.0008099540136754513,
+        -0.06793856620788574,
+    ])
     return DINOV2_IMAGENET1K_MEAN, DINOV2_IMAGENET1K_SCALAR
 
 
@@ -1012,7 +1010,6 @@ def _dino_normalize(x):
             x.clamp(-1e-5, 1e5) - DINOV2_IMAGENET1K_MEAN.to(x.device)
         ) / DINOV2_IMAGENET1K_SCALAR
 
-
     dino_patch_acts, dino_out = extract_features(
         dino_vit, dino_sae, img, dino_tr, normalize_fn=_dino_normalize
     )

From 01e12c2bd007314b9b64adffcd41726fb3ff985b Mon Sep 17 00:00:00 2001
From: Matthew Thompson <thompson.4509@osu.edu>
Date: Fri, 12 Jun 2026 13:21:22 -0400
Subject: [PATCH 7/9] Add local zenodo validation capabilities

---
 justfile                     |  3 +++
 scripts/validation-zenodo.sh | 26 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100755 scripts/validation-zenodo.sh

diff --git a/justfile b/justfile
index 1455a45..7c77dd2 100644
--- a/justfile
+++ b/justfile
@@ -4,6 +4,9 @@ docs: lint
     -yek src/saev README.md AGENTS.md > docs/api/llms.txt
     uv run mkdocs build --config-file docs/mkdocs.yml
 
+validate-zenodo:
+    sh scripts/validation-zenodo.sh
+
 test:
     uv run pytest -m "not slow and not integration" tests
 
diff --git a/scripts/validation-zenodo.sh b/scripts/validation-zenodo.sh
new file mode 100755
index 0000000..cfcf966
--- /dev/null
+++ b/scripts/validation-zenodo.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# Validate .zenodo.json with the same pinned zenodraft as CI.
+
+set -eu
+
+workflow=".github/workflows/validate-zenodo.yaml"
+
+# e.g. "zenodraft@0.14.1" from the `npm install zenodraft@0.14.1` line.
+spec=$(grep -oE 'zenodraft@[0-9]+\.[0-9]+\.[0-9]+' "$workflow" | head -n 1)
+if [ -z "$spec" ]; then
+    echo "could not read pinned zenodraft version from $workflow" >&2
+    exit 1
+fi
+
+if command -v npx >/dev/null 2>&1; then
+    exec npx "$spec" metadata validate .zenodo.json
+elif command -v bunx >/dev/null 2>&1; then
+    exec bunx "$spec" metadata validate .zenodo.json
+elif command -v dx >/dev/null 2>&1; then
+    exec dx "$spec" metadata validate .zenodo.json
+elif command -v deno >/dev/null 2>&1; then
+    exec deno run --allow-read "npm:$spec" metadata validate .zenodo.json
+else
+    echo "no JS runner found (tried: npx bunx dx deno)" >&2
+    exit 1
+fi

From 1c7a568fa9523077b50df26984f5bdaa14ff41c0 Mon Sep 17 00:00:00 2001
From: Matthew Thompson <thompson.4509@osu.edu>
Date: Fri, 12 Jun 2026 13:39:40 -0400
Subject: [PATCH 8/9] Instruct citation via GitHub link

---
 README.md | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 9ac798f..0734b57 100644
--- a/README.md
+++ b/README.md
@@ -24,15 +24,6 @@ Trained SAE checkpoints are available at:
 
 - [Huggingface Models](https://huggingface.co/collections/osunlp/sae-v-67ab8c4fdf179d117db28195)
 
-If you want to cite the software, please cite it as:
-
-```bib
-@software{stevens2025saev,
-  author = {Stevens, Samuel},
-  month = apr,
-  title = {{saev}},
-  url = {https://github.com/Imageomics/saev},
-  year = {2025}
-}
-```
+## Citation
 
+If you want to cite the software, please use the "Cite this repository" link on the GitHub page, which will provide you with the appropriate citation format.

From cddf8ed1e116a6d604f9f38386080f669ed2d76b Mon Sep 17 00:00:00 2001
From: egrace479 <e.campolongo479@gmail.com>
Date: Thu, 18 Jun 2026 14:56:37 -0400
Subject: [PATCH 9/9] Set release date

---
 .zenodo.json | 2 +-
 CITATION.cff | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.zenodo.json b/.zenodo.json
index d4ae2b4..8fa7c62 100644
--- a/.zenodo.json
+++ b/.zenodo.json
@@ -16,7 +16,7 @@
     "title": "saev: Sparse Autoencoders for Vision Transformers",
     "version": "0.1.0",
     "license": "MIT",
-    "publication_date": "2026-06-10",
+    "publication_date": "2026-06-18",
     "grants": [
         {
             "id": "021nxhr62::2118240"
diff --git a/CITATION.cff b/CITATION.cff
index e716abc..99e77f4 100644
--- a/CITATION.cff
+++ b/CITATION.cff
@@ -24,7 +24,7 @@ keywords:
   - interpretability
   - computer vision
 license: MIT
-date-released: '2026-06-10'
+date-released: '2026-06-18'
 identifiers:
   - description: "The GitHub release URL of tag v0.1.0."
     type: url